166 zfreei(so->so_zone, so); 167} 168 169int 170solisten(so, backlog, p) 171 register struct socket *so; 172 int backlog; 173 struct proc *p; 174{ 175 int s, error; 176 177 s = splnet(); 178 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 179 if (error) { 180 splx(s); 181 return (error); 182 } 183 if (so->so_comp.tqh_first == NULL) 184 so->so_options |= SO_ACCEPTCONN; 185 if (backlog < 0 || backlog > somaxconn) 186 backlog = somaxconn; 187 so->so_qlimit = backlog; 188 splx(s); 189 return (0); 190} 191 192void 193sofree(so) 194 register struct socket *so; 195{ 196 struct socket *head = so->so_head; 197 198 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 199 return; 200 if (head != NULL) { 201 if (so->so_state & SS_INCOMP) { 202 TAILQ_REMOVE(&head->so_incomp, so, so_list); 203 head->so_incqlen--; 204 } else if (so->so_state & SS_COMP) { 205 /* 206 * We must not decommission a socket that's 207 * on the accept(2) queue. If we do, then 208 * accept(2) may hang after select(2) indicated 209 * that the listening socket was ready. 210 */ 211 return; 212 } else { 213 panic("sofree: not queued"); 214 } 215 head->so_qlen--; 216 so->so_state &= ~SS_INCOMP; 217 so->so_head = NULL; 218 } 219 sbrelease(&so->so_snd); 220 sorflush(so); 221 sodealloc(so); 222} 223 224/* 225 * Close a socket on last file table reference removal. 226 * Initiate disconnect if connected. 227 * Free socket when disconnect complete. 228 */ 229int 230soclose(so) 231 register struct socket *so; 232{ 233 int s = splnet(); /* conservative */ 234 int error = 0; 235 236 funsetown(so->so_sigio); 237 if (so->so_options & SO_ACCEPTCONN) { 238 struct socket *sp, *sonext; 239 240 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 241 sonext = sp->so_list.tqe_next; 242 (void) soabort(sp); 243 } 244 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 245 sonext = sp->so_list.tqe_next; 246 /* Dequeue from so_comp since sofree() won't do it */ 247 TAILQ_REMOVE(&so->so_comp, sp, so_list); 248 so->so_qlen--; 249 sp->so_state &= ~SS_COMP; 250 sp->so_head = NULL; 251 (void) soabort(sp); 252 } 253 } 254 if (so->so_pcb == 0) 255 goto discard; 256 if (so->so_state & SS_ISCONNECTED) { 257 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 258 error = sodisconnect(so); 259 if (error) 260 goto drop; 261 } 262 if (so->so_options & SO_LINGER) { 263 if ((so->so_state & SS_ISDISCONNECTING) && 264 (so->so_state & SS_NBIO)) 265 goto drop; 266 while (so->so_state & SS_ISCONNECTED) { 267 error = tsleep((caddr_t)&so->so_timeo, 268 PSOCK | PCATCH, "soclos", so->so_linger * hz); 269 if (error) 270 break; 271 } 272 } 273 } 274drop: 275 if (so->so_pcb) { 276 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 277 if (error == 0) 278 error = error2; 279 } 280discard: 281 if (so->so_state & SS_NOFDREF) 282 panic("soclose: NOFDREF"); 283 so->so_state |= SS_NOFDREF; 284 sofree(so); 285 splx(s); 286 return (error); 287} 288 289/* 290 * Must be called at splnet... 291 */ 292int 293soabort(so) 294 struct socket *so; 295{ 296 297 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 298} 299 300int 301soaccept(so, nam) 302 register struct socket *so; 303 struct sockaddr **nam; 304{ 305 int s = splnet(); 306 int error; 307 308 if ((so->so_state & SS_NOFDREF) == 0) 309 panic("soaccept: !NOFDREF"); 310 so->so_state &= ~SS_NOFDREF; 311 if ((so->so_state & SS_ISDISCONNECTED) == 0) 312 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 313 else { 314 if (nam) 315 *nam = 0; 316 error = 0; 317 } 318 splx(s); 319 return (error); 320} 321 322int 323soconnect(so, nam, p) 324 register struct socket *so; 325 struct sockaddr *nam; 326 struct proc *p; 327{ 328 int s; 329 int error; 330 331 if (so->so_options & SO_ACCEPTCONN) 332 return (EOPNOTSUPP); 333 s = splnet(); 334 /* 335 * If protocol is connection-based, can only connect once. 336 * Otherwise, if connected, try to disconnect first. 337 * This allows user to disconnect by connecting to, e.g., 338 * a null address. 339 */ 340 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 341 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 342 (error = sodisconnect(so)))) 343 error = EISCONN; 344 else 345 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 346 splx(s); 347 return (error); 348} 349 350int 351soconnect2(so1, so2) 352 register struct socket *so1; 353 struct socket *so2; 354{ 355 int s = splnet(); 356 int error; 357 358 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 359 splx(s); 360 return (error); 361} 362 363int 364sodisconnect(so) 365 register struct socket *so; 366{ 367 int s = splnet(); 368 int error; 369 370 if ((so->so_state & SS_ISCONNECTED) == 0) { 371 error = ENOTCONN; 372 goto bad; 373 } 374 if (so->so_state & SS_ISDISCONNECTING) { 375 error = EALREADY; 376 goto bad; 377 } 378 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 379bad: 380 splx(s); 381 return (error); 382} 383 384#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 385/* 386 * Send on a socket. 387 * If send must go all at once and message is larger than 388 * send buffering, then hard error. 389 * Lock against other senders. 390 * If must go all at once and not enough room now, then 391 * inform user that this would block and do nothing. 392 * Otherwise, if nonblocking, send as much as possible. 393 * The data to be sent is described by "uio" if nonzero, 394 * otherwise by the mbuf chain "top" (which must be null 395 * if uio is not). Data provided in mbuf chain must be small 396 * enough to send all at once. 397 * 398 * Returns nonzero on error, timeout or signal; callers 399 * must check for short counts if EINTR/ERESTART are returned. 400 * Data and control buffers are freed on return. 401 */ 402int 403sosend(so, addr, uio, top, control, flags, p) 404 register struct socket *so; 405 struct sockaddr *addr; 406 struct uio *uio; 407 struct mbuf *top; 408 struct mbuf *control; 409 int flags; 410 struct proc *p; 411{ 412 struct mbuf **mp; 413 register struct mbuf *m; 414 register long space, len, resid; 415 int clen = 0, error, s, dontroute, mlen; 416 int atomic = sosendallatonce(so) || top; 417 418 if (uio) 419 resid = uio->uio_resid; 420 else 421 resid = top->m_pkthdr.len; 422 /* 423 * In theory resid should be unsigned. 424 * However, space must be signed, as it might be less than 0 425 * if we over-committed, and we must use a signed comparison 426 * of space and resid. On the other hand, a negative resid 427 * causes us to loop sending 0-length segments to the protocol. 428 * 429 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 430 * type sockets since that's an error. 431 */ 432 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 433 error = EINVAL; 434 goto out; 435 } 436 437 dontroute = 438 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 439 (so->so_proto->pr_flags & PR_ATOMIC); 440 if (p) 441 p->p_stats->p_ru.ru_msgsnd++; 442 if (control) 443 clen = control->m_len; 444#define snderr(errno) { error = errno; splx(s); goto release; } 445 446restart: 447 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 448 if (error) 449 goto out; 450 do { 451 s = splnet(); 452 if (so->so_state & SS_CANTSENDMORE) 453 snderr(EPIPE); 454 if (so->so_error) { 455 error = so->so_error; 456 so->so_error = 0; 457 splx(s); 458 goto release; 459 } 460 if ((so->so_state & SS_ISCONNECTED) == 0) { 461 /* 462 * `sendto' and `sendmsg' is allowed on a connection- 463 * based socket if it supports implied connect. 464 * Return ENOTCONN if not connected and no address is 465 * supplied. 466 */ 467 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 468 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 469 if ((so->so_state & SS_ISCONFIRMING) == 0 && 470 !(resid == 0 && clen != 0)) 471 snderr(ENOTCONN); 472 } else if (addr == 0) 473 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 474 ENOTCONN : EDESTADDRREQ); 475 } 476 space = sbspace(&so->so_snd); 477 if (flags & MSG_OOB) 478 space += 1024; 479 if ((atomic && resid > so->so_snd.sb_hiwat) || 480 clen > so->so_snd.sb_hiwat) 481 snderr(EMSGSIZE); 482 if (space < resid + clen && uio && 483 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 484 if (so->so_state & SS_NBIO) 485 snderr(EWOULDBLOCK); 486 sbunlock(&so->so_snd); 487 error = sbwait(&so->so_snd); 488 splx(s); 489 if (error) 490 goto out; 491 goto restart; 492 } 493 splx(s); 494 mp = ⊤ 495 space -= clen; 496 do { 497 if (uio == NULL) { 498 /* 499 * Data is prepackaged in "top". 500 */ 501 resid = 0; 502 if (flags & MSG_EOR) 503 top->m_flags |= M_EOR; 504 } else do { 505 if (top == 0) { 506 MGETHDR(m, M_WAIT, MT_DATA); 507 mlen = MHLEN; 508 m->m_pkthdr.len = 0; 509 m->m_pkthdr.rcvif = (struct ifnet *)0; 510 } else { 511 MGET(m, M_WAIT, MT_DATA); 512 mlen = MLEN; 513 } 514 if (resid >= MINCLSIZE) { 515 MCLGET(m, M_WAIT); 516 if ((m->m_flags & M_EXT) == 0) 517 goto nopages; 518 mlen = MCLBYTES; 519 len = min(min(mlen, resid), space); 520 } else { 521nopages: 522 len = min(min(mlen, resid), space); 523 /* 524 * For datagram protocols, leave room 525 * for protocol headers in first mbuf. 526 */ 527 if (atomic && top == 0 && len < mlen) 528 MH_ALIGN(m, len); 529 } 530 space -= len; 531 error = uiomove(mtod(m, caddr_t), (int)len, uio); 532 resid = uio->uio_resid; 533 m->m_len = len; 534 *mp = m; 535 top->m_pkthdr.len += len; 536 if (error) 537 goto release; 538 mp = &m->m_next; 539 if (resid <= 0) { 540 if (flags & MSG_EOR) 541 top->m_flags |= M_EOR; 542 break; 543 } 544 } while (space > 0 && atomic); 545 if (dontroute) 546 so->so_options |= SO_DONTROUTE; 547 s = splnet(); /* XXX */ 548 /* 549 * XXX all the SS_CANTSENDMORE checks previously 550 * done could be out of date. We could have recieved 551 * a reset packet in an interrupt or maybe we slept 552 * while doing page faults in uiomove() etc. We could 553 * probably recheck again inside the splnet() protection 554 * here, but there are probably other places that this 555 * also happens. We must rethink this. 556 */ 557 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 558 (flags & MSG_OOB) ? PRUS_OOB : 559 /* 560 * If the user set MSG_EOF, the protocol 561 * understands this flag and nothing left to 562 * send then use PRU_SEND_EOF instead of PRU_SEND. 563 */ 564 ((flags & MSG_EOF) && 565 (so->so_proto->pr_flags & PR_IMPLOPCL) && 566 (resid <= 0)) ? 567 PRUS_EOF : 568 /* If there is more to send set PRUS_MORETOCOME */ 569 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 570 top, addr, control, p); 571 splx(s); 572 if (dontroute) 573 so->so_options &= ~SO_DONTROUTE; 574 clen = 0; 575 control = 0; 576 top = 0; 577 mp = ⊤ 578 if (error) 579 goto release; 580 } while (resid && space > 0); 581 } while (resid); 582 583release: 584 sbunlock(&so->so_snd); 585out: 586 if (top) 587 m_freem(top); 588 if (control) 589 m_freem(control); 590 return (error); 591} 592 593/* 594 * Implement receive operations on a socket. 595 * We depend on the way that records are added to the sockbuf 596 * by sbappend*. In particular, each record (mbufs linked through m_next) 597 * must begin with an address if the protocol so specifies, 598 * followed by an optional mbuf or mbufs containing ancillary data, 599 * and then zero or more mbufs of data. 600 * In order to avoid blocking network interrupts for the entire time here, 601 * we splx() while doing the actual copy to user space. 602 * Although the sockbuf is locked, new data may still be appended, 603 * and thus we must maintain consistency of the sockbuf during that time. 604 * 605 * The caller may receive the data as a single mbuf chain by supplying 606 * an mbuf **mp0 for use in returning the chain. The uio is then used 607 * only for the count in uio_resid. 608 */ 609int 610soreceive(so, psa, uio, mp0, controlp, flagsp) 611 register struct socket *so; 612 struct sockaddr **psa; 613 struct uio *uio; 614 struct mbuf **mp0; 615 struct mbuf **controlp; 616 int *flagsp; 617{ 618 register struct mbuf *m, **mp; 619 register int flags, len, error, s, offset; 620 struct protosw *pr = so->so_proto; 621 struct mbuf *nextrecord; 622 int moff, type = 0; 623 int orig_resid = uio->uio_resid; 624 625 mp = mp0; 626 if (psa) 627 *psa = 0; 628 if (controlp) 629 *controlp = 0; 630 if (flagsp) 631 flags = *flagsp &~ MSG_EOR; 632 else 633 flags = 0; 634 if (flags & MSG_OOB) { 635 m = m_get(M_WAIT, MT_DATA); 636 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 637 if (error) 638 goto bad; 639 do { 640 error = uiomove(mtod(m, caddr_t), 641 (int) min(uio->uio_resid, m->m_len), uio); 642 m = m_free(m); 643 } while (uio->uio_resid && error == 0 && m); 644bad: 645 if (m) 646 m_freem(m); 647 return (error); 648 } 649 if (mp) 650 *mp = (struct mbuf *)0; 651 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 652 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 653 654restart: 655 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 656 if (error) 657 return (error); 658 s = splnet(); 659 660 m = so->so_rcv.sb_mb; 661 /* 662 * If we have less data than requested, block awaiting more 663 * (subject to any timeout) if: 664 * 1. the current count is less than the low water mark, or 665 * 2. MSG_WAITALL is set, and it is possible to do the entire 666 * receive operation at once if we block (resid <= hiwat). 667 * 3. MSG_DONTWAIT is not set 668 * If MSG_WAITALL is set but resid is larger than the receive buffer, 669 * we have to do the receive in sections, and thus risk returning 670 * a short count if a timeout or signal occurs after we start. 671 */ 672 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 673 so->so_rcv.sb_cc < uio->uio_resid) && 674 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 675 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 676 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 677 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); 678 if (so->so_error) { 679 if (m) 680 goto dontblock; 681 error = so->so_error; 682 if ((flags & MSG_PEEK) == 0) 683 so->so_error = 0; 684 goto release; 685 } 686 if (so->so_state & SS_CANTRCVMORE) { 687 if (m) 688 goto dontblock; 689 else 690 goto release; 691 } 692 for (; m; m = m->m_next) 693 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 694 m = so->so_rcv.sb_mb; 695 goto dontblock; 696 } 697 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 698 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 699 error = ENOTCONN; 700 goto release; 701 } 702 if (uio->uio_resid == 0) 703 goto release; 704 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 705 error = EWOULDBLOCK; 706 goto release; 707 } 708 sbunlock(&so->so_rcv); 709 error = sbwait(&so->so_rcv); 710 splx(s); 711 if (error) 712 return (error); 713 goto restart; 714 } 715dontblock: 716 if (uio->uio_procp) 717 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 718 nextrecord = m->m_nextpkt; 719 if (pr->pr_flags & PR_ADDR) { 720 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 721 orig_resid = 0; 722 if (psa) 723 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 724 mp0 == 0); 725 if (flags & MSG_PEEK) { 726 m = m->m_next; 727 } else { 728 sbfree(&so->so_rcv, m); 729 MFREE(m, so->so_rcv.sb_mb); 730 m = so->so_rcv.sb_mb; 731 } 732 } 733 while (m && m->m_type == MT_CONTROL && error == 0) { 734 if (flags & MSG_PEEK) { 735 if (controlp) 736 *controlp = m_copy(m, 0, m->m_len); 737 m = m->m_next; 738 } else { 739 sbfree(&so->so_rcv, m); 740 if (controlp) { 741 if (pr->pr_domain->dom_externalize && 742 mtod(m, struct cmsghdr *)->cmsg_type == 743 SCM_RIGHTS) 744 error = (*pr->pr_domain->dom_externalize)(m); 745 *controlp = m; 746 so->so_rcv.sb_mb = m->m_next; 747 m->m_next = 0; 748 m = so->so_rcv.sb_mb; 749 } else { 750 MFREE(m, so->so_rcv.sb_mb); 751 m = so->so_rcv.sb_mb; 752 } 753 } 754 if (controlp) { 755 orig_resid = 0; 756 controlp = &(*controlp)->m_next; 757 } 758 } 759 if (m) { 760 if ((flags & MSG_PEEK) == 0) 761 m->m_nextpkt = nextrecord; 762 type = m->m_type; 763 if (type == MT_OOBDATA) 764 flags |= MSG_OOB; 765 } 766 moff = 0; 767 offset = 0; 768 while (m && uio->uio_resid > 0 && error == 0) { 769 if (m->m_type == MT_OOBDATA) { 770 if (type != MT_OOBDATA) 771 break; 772 } else if (type == MT_OOBDATA) 773 break; 774 else 775 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 776 ("receive 3")); 777 so->so_state &= ~SS_RCVATMARK; 778 len = uio->uio_resid; 779 if (so->so_oobmark && len > so->so_oobmark - offset) 780 len = so->so_oobmark - offset; 781 if (len > m->m_len - moff) 782 len = m->m_len - moff; 783 /* 784 * If mp is set, just pass back the mbufs. 785 * Otherwise copy them out via the uio, then free. 786 * Sockbuf must be consistent here (points to current mbuf, 787 * it points to next record) when we drop priority; 788 * we must note any additions to the sockbuf when we 789 * block interrupts again. 790 */ 791 if (mp == 0) { 792 splx(s); 793 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 794 s = splnet(); 795 if (error) 796 goto release; 797 } else 798 uio->uio_resid -= len; 799 if (len == m->m_len - moff) { 800 if (m->m_flags & M_EOR) 801 flags |= MSG_EOR; 802 if (flags & MSG_PEEK) { 803 m = m->m_next; 804 moff = 0; 805 } else { 806 nextrecord = m->m_nextpkt; 807 sbfree(&so->so_rcv, m); 808 if (mp) { 809 *mp = m; 810 mp = &m->m_next; 811 so->so_rcv.sb_mb = m = m->m_next; 812 *mp = (struct mbuf *)0; 813 } else { 814 MFREE(m, so->so_rcv.sb_mb); 815 m = so->so_rcv.sb_mb; 816 } 817 if (m) 818 m->m_nextpkt = nextrecord; 819 } 820 } else { 821 if (flags & MSG_PEEK) 822 moff += len; 823 else { 824 if (mp) 825 *mp = m_copym(m, 0, len, M_WAIT); 826 m->m_data += len; 827 m->m_len -= len; 828 so->so_rcv.sb_cc -= len; 829 } 830 } 831 if (so->so_oobmark) { 832 if ((flags & MSG_PEEK) == 0) { 833 so->so_oobmark -= len; 834 if (so->so_oobmark == 0) { 835 so->so_state |= SS_RCVATMARK; 836 break; 837 } 838 } else { 839 offset += len; 840 if (offset == so->so_oobmark) 841 break; 842 } 843 } 844 if (flags & MSG_EOR) 845 break; 846 /* 847 * If the MSG_WAITALL flag is set (for non-atomic socket), 848 * we must not quit until "uio->uio_resid == 0" or an error 849 * termination. If a signal/timeout occurs, return 850 * with a short count but without error. 851 * Keep sockbuf locked against other readers. 852 */ 853 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 854 !sosendallatonce(so) && !nextrecord) { 855 if (so->so_error || so->so_state & SS_CANTRCVMORE) 856 break; 857 error = sbwait(&so->so_rcv); 858 if (error) { 859 sbunlock(&so->so_rcv); 860 splx(s); 861 return (0); 862 } 863 m = so->so_rcv.sb_mb; 864 if (m) 865 nextrecord = m->m_nextpkt; 866 } 867 } 868 869 if (m && pr->pr_flags & PR_ATOMIC) { 870 flags |= MSG_TRUNC; 871 if ((flags & MSG_PEEK) == 0) 872 (void) sbdroprecord(&so->so_rcv); 873 } 874 if ((flags & MSG_PEEK) == 0) { 875 if (m == 0) 876 so->so_rcv.sb_mb = nextrecord; 877 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 878 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 879 } 880 if (orig_resid == uio->uio_resid && orig_resid && 881 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 882 sbunlock(&so->so_rcv); 883 splx(s); 884 goto restart; 885 } 886 887 if (flagsp) 888 *flagsp |= flags; 889release: 890 sbunlock(&so->so_rcv); 891 splx(s); 892 return (error); 893} 894 895int 896soshutdown(so, how) 897 register struct socket *so; 898 register int how; 899{ 900 register struct protosw *pr = so->so_proto; 901 902 how++; 903 if (how & FREAD) 904 sorflush(so); 905 if (how & FWRITE) 906 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 907 return (0); 908} 909 910void 911sorflush(so) 912 register struct socket *so; 913{ 914 register struct sockbuf *sb = &so->so_rcv; 915 register struct protosw *pr = so->so_proto; 916 register int s; 917 struct sockbuf asb; 918 919 sb->sb_flags |= SB_NOINTR; 920 (void) sblock(sb, M_WAITOK); 921 s = splimp(); 922 socantrcvmore(so); 923 sbunlock(sb); 924 asb = *sb; 925 bzero((caddr_t)sb, sizeof (*sb)); 926 splx(s); 927 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 928 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 929 sbrelease(&asb); 930} 931 932/* 933 * Perhaps this routine, and sooptcopyout(), below, ought to come in 934 * an additional variant to handle the case where the option value needs 935 * to be some kind of integer, but not a specific size. 936 * In addition to their use here, these functions are also called by the 937 * protocol-level pr_ctloutput() routines. 938 */ 939int 940sooptcopyin(sopt, buf, len, minlen) 941 struct sockopt *sopt; 942 void *buf; 943 size_t len; 944 size_t minlen; 945{ 946 size_t valsize; 947 948 /* 949 * If the user gives us more than we wanted, we ignore it, 950 * but if we don't get the minimum length the caller 951 * wants, we return EINVAL. On success, sopt->sopt_valsize 952 * is set to however much we actually retrieved. 953 */ 954 if ((valsize = sopt->sopt_valsize) < minlen) 955 return EINVAL; 956 if (valsize > len) 957 sopt->sopt_valsize = valsize = len; 958 959 if (sopt->sopt_p != 0) 960 return (copyin(sopt->sopt_val, buf, valsize)); 961 962 bcopy(sopt->sopt_val, buf, valsize); 963 return 0; 964} 965 966int 967sosetopt(so, sopt) 968 struct socket *so; 969 struct sockopt *sopt; 970{ 971 int error, optval; 972 struct linger l; 973 struct timeval tv; 974 u_long val; 975 976 error = 0; 977 if (sopt->sopt_level != SOL_SOCKET) { 978 if (so->so_proto && so->so_proto->pr_ctloutput) 979 return ((*so->so_proto->pr_ctloutput) 980 (so, sopt)); 981 error = ENOPROTOOPT; 982 } else { 983 switch (sopt->sopt_name) { 984 case SO_LINGER: 985 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 986 if (error) 987 goto bad; 988 989 so->so_linger = l.l_linger; 990 if (l.l_onoff) 991 so->so_options |= SO_LINGER; 992 else 993 so->so_options &= ~SO_LINGER; 994 break; 995 996 case SO_DEBUG: 997 case SO_KEEPALIVE: 998 case SO_DONTROUTE: 999 case SO_USELOOPBACK: 1000 case SO_BROADCAST: 1001 case SO_REUSEADDR: 1002 case SO_REUSEPORT: 1003 case SO_OOBINLINE: 1004 case SO_TIMESTAMP: 1005 error = sooptcopyin(sopt, &optval, sizeof optval, 1006 sizeof optval); 1007 if (error) 1008 goto bad; 1009 if (optval) 1010 so->so_options |= sopt->sopt_name; 1011 else 1012 so->so_options &= ~sopt->sopt_name; 1013 break; 1014 1015 case SO_SNDBUF: 1016 case SO_RCVBUF: 1017 case SO_SNDLOWAT: 1018 case SO_RCVLOWAT: 1019 error = sooptcopyin(sopt, &optval, sizeof optval, 1020 sizeof optval); 1021 if (error) 1022 goto bad; 1023 1024 /* 1025 * Values < 1 make no sense for any of these 1026 * options, so disallow them. 1027 */ 1028 if (optval < 1) { 1029 error = EINVAL; 1030 goto bad; 1031 } 1032 1033 switch (sopt->sopt_name) { 1034 case SO_SNDBUF: 1035 case SO_RCVBUF: 1036 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1037 &so->so_snd : &so->so_rcv, 1038 (u_long) optval) == 0) { 1039 error = ENOBUFS; 1040 goto bad; 1041 } 1042 break; 1043 1044 /* 1045 * Make sure the low-water is never greater than 1046 * the high-water. 1047 */ 1048 case SO_SNDLOWAT: 1049 so->so_snd.sb_lowat = 1050 (optval > so->so_snd.sb_hiwat) ? 1051 so->so_snd.sb_hiwat : optval; 1052 break; 1053 case SO_RCVLOWAT: 1054 so->so_rcv.sb_lowat = 1055 (optval > so->so_rcv.sb_hiwat) ? 1056 so->so_rcv.sb_hiwat : optval; 1057 break; 1058 } 1059 break; 1060 1061 case SO_SNDTIMEO: 1062 case SO_RCVTIMEO: 1063 error = sooptcopyin(sopt, &tv, sizeof tv, 1064 sizeof tv); 1065 if (error) 1066 goto bad; 1067 1068 /* assert(hz > 0); */ 1069 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1070 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1071 error = EDOM; 1072 goto bad; 1073 } 1074 /* assert(tick > 0); */ 1075 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1076 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1077 if (val > SHRT_MAX) { 1078 error = EDOM; 1079 goto bad; 1080 } 1081 1082 switch (sopt->sopt_name) { 1083 case SO_SNDTIMEO: 1084 so->so_snd.sb_timeo = val; 1085 break; 1086 case SO_RCVTIMEO: 1087 so->so_rcv.sb_timeo = val; 1088 break; 1089 } 1090 break; 1091 1092 default: 1093 error = ENOPROTOOPT; 1094 break; 1095 } 1096 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1097 (void) ((*so->so_proto->pr_ctloutput) 1098 (so, sopt)); 1099 } 1100 } 1101bad: 1102 return (error); 1103} 1104 1105/* Helper routine for getsockopt */ 1106int 1107sooptcopyout(sopt, buf, len) 1108 struct sockopt *sopt; 1109 void *buf; 1110 size_t len; 1111{ 1112 int error; 1113 size_t valsize; 1114 1115 error = 0; 1116 1117 /* 1118 * Documented get behavior is that we always return a value, 1119 * possibly truncated to fit in the user's buffer. 1120 * Traditional behavior is that we always tell the user 1121 * precisely how much we copied, rather than something useful 1122 * like the total amount we had available for her. 1123 * Note that this interface is not idempotent; the entire answer must 1124 * generated ahead of time. 1125 */ 1126 valsize = min(len, sopt->sopt_valsize); 1127 sopt->sopt_valsize = valsize; 1128 if (sopt->sopt_val != 0) { 1129 if (sopt->sopt_p != 0) 1130 error = copyout(buf, sopt->sopt_val, valsize); 1131 else 1132 bcopy(buf, sopt->sopt_val, valsize); 1133 } 1134 return error; 1135} 1136 1137int 1138sogetopt(so, sopt) 1139 struct socket *so; 1140 struct sockopt *sopt; 1141{ 1142 int error, optval; 1143 struct linger l; 1144 struct timeval tv; 1145 1146 error = 0; 1147 if (sopt->sopt_level != SOL_SOCKET) { 1148 if (so->so_proto && so->so_proto->pr_ctloutput) { 1149 return ((*so->so_proto->pr_ctloutput) 1150 (so, sopt)); 1151 } else 1152 return (ENOPROTOOPT); 1153 } else { 1154 switch (sopt->sopt_name) { 1155 case SO_LINGER: 1156 l.l_onoff = so->so_options & SO_LINGER; 1157 l.l_linger = so->so_linger; 1158 error = sooptcopyout(sopt, &l, sizeof l); 1159 break; 1160 1161 case SO_USELOOPBACK: 1162 case SO_DONTROUTE: 1163 case SO_DEBUG: 1164 case SO_KEEPALIVE: 1165 case SO_REUSEADDR: 1166 case SO_REUSEPORT: 1167 case SO_BROADCAST: 1168 case SO_OOBINLINE: 1169 case SO_TIMESTAMP: 1170 optval = so->so_options & sopt->sopt_name; 1171integer: 1172 error = sooptcopyout(sopt, &optval, sizeof optval); 1173 break; 1174 1175 case SO_TYPE: 1176 optval = so->so_type; 1177 goto integer; 1178 1179 case SO_ERROR: 1180 optval = so->so_error; 1181 so->so_error = 0; 1182 goto integer; 1183 1184 case SO_SNDBUF: 1185 optval = so->so_snd.sb_hiwat; 1186 goto integer; 1187 1188 case SO_RCVBUF: 1189 optval = so->so_rcv.sb_hiwat; 1190 goto integer; 1191 1192 case SO_SNDLOWAT: 1193 optval = so->so_snd.sb_lowat; 1194 goto integer; 1195 1196 case SO_RCVLOWAT: 1197 optval = so->so_rcv.sb_lowat; 1198 goto integer; 1199 1200 case SO_SNDTIMEO: 1201 case SO_RCVTIMEO: 1202 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1203 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1204 1205 tv.tv_sec = optval / hz; 1206 tv.tv_usec = (optval % hz) * tick; 1207 error = sooptcopyout(sopt, &tv, sizeof tv); 1208 break; 1209 1210 default: 1211 error = ENOPROTOOPT; 1212 break; 1213 } 1214 return (error); 1215 } 1216} 1217 1218void 1219sohasoutofband(so) 1220 register struct socket *so; 1221{ 1222 if (so->so_sigio != NULL) 1223 pgsigio(so->so_sigio, SIGURG, 0); 1224 selwakeup(&so->so_rcv.sb_sel); 1225} 1226 1227int 1228sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1229{ 1230 int revents = 0; 1231 int s = splnet(); 1232 1233 if (events & (POLLIN | POLLRDNORM)) 1234 if (soreadable(so)) 1235 revents |= events & (POLLIN | POLLRDNORM); 1236 1237 if (events & (POLLOUT | POLLWRNORM)) 1238 if (sowriteable(so)) 1239 revents |= events & (POLLOUT | POLLWRNORM); 1240 1241 if (events & (POLLPRI | POLLRDBAND)) 1242 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1243 revents |= events & (POLLPRI | POLLRDBAND); 1244 1245 if (revents == 0) { 1246 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1247 selrecord(p, &so->so_rcv.sb_sel); 1248 so->so_rcv.sb_flags |= SB_SEL; 1249 } 1250 1251 if (events & (POLLOUT | POLLWRNORM)) { 1252 selrecord(p, &so->so_snd.sb_sel); 1253 so->so_snd.sb_flags |= SB_SEL; 1254 } 1255 } 1256 1257 splx(s); 1258 return (revents); 1259}
| 162 zfreei(so->so_zone, so); 163} 164 165int 166solisten(so, backlog, p) 167 register struct socket *so; 168 int backlog; 169 struct proc *p; 170{ 171 int s, error; 172 173 s = splnet(); 174 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 175 if (error) { 176 splx(s); 177 return (error); 178 } 179 if (so->so_comp.tqh_first == NULL) 180 so->so_options |= SO_ACCEPTCONN; 181 if (backlog < 0 || backlog > somaxconn) 182 backlog = somaxconn; 183 so->so_qlimit = backlog; 184 splx(s); 185 return (0); 186} 187 188void 189sofree(so) 190 register struct socket *so; 191{ 192 struct socket *head = so->so_head; 193 194 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 195 return; 196 if (head != NULL) { 197 if (so->so_state & SS_INCOMP) { 198 TAILQ_REMOVE(&head->so_incomp, so, so_list); 199 head->so_incqlen--; 200 } else if (so->so_state & SS_COMP) { 201 /* 202 * We must not decommission a socket that's 203 * on the accept(2) queue. If we do, then 204 * accept(2) may hang after select(2) indicated 205 * that the listening socket was ready. 206 */ 207 return; 208 } else { 209 panic("sofree: not queued"); 210 } 211 head->so_qlen--; 212 so->so_state &= ~SS_INCOMP; 213 so->so_head = NULL; 214 } 215 sbrelease(&so->so_snd); 216 sorflush(so); 217 sodealloc(so); 218} 219 220/* 221 * Close a socket on last file table reference removal. 222 * Initiate disconnect if connected. 223 * Free socket when disconnect complete. 224 */ 225int 226soclose(so) 227 register struct socket *so; 228{ 229 int s = splnet(); /* conservative */ 230 int error = 0; 231 232 funsetown(so->so_sigio); 233 if (so->so_options & SO_ACCEPTCONN) { 234 struct socket *sp, *sonext; 235 236 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 237 sonext = sp->so_list.tqe_next; 238 (void) soabort(sp); 239 } 240 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 241 sonext = sp->so_list.tqe_next; 242 /* Dequeue from so_comp since sofree() won't do it */ 243 TAILQ_REMOVE(&so->so_comp, sp, so_list); 244 so->so_qlen--; 245 sp->so_state &= ~SS_COMP; 246 sp->so_head = NULL; 247 (void) soabort(sp); 248 } 249 } 250 if (so->so_pcb == 0) 251 goto discard; 252 if (so->so_state & SS_ISCONNECTED) { 253 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 254 error = sodisconnect(so); 255 if (error) 256 goto drop; 257 } 258 if (so->so_options & SO_LINGER) { 259 if ((so->so_state & SS_ISDISCONNECTING) && 260 (so->so_state & SS_NBIO)) 261 goto drop; 262 while (so->so_state & SS_ISCONNECTED) { 263 error = tsleep((caddr_t)&so->so_timeo, 264 PSOCK | PCATCH, "soclos", so->so_linger * hz); 265 if (error) 266 break; 267 } 268 } 269 } 270drop: 271 if (so->so_pcb) { 272 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 273 if (error == 0) 274 error = error2; 275 } 276discard: 277 if (so->so_state & SS_NOFDREF) 278 panic("soclose: NOFDREF"); 279 so->so_state |= SS_NOFDREF; 280 sofree(so); 281 splx(s); 282 return (error); 283} 284 285/* 286 * Must be called at splnet... 287 */ 288int 289soabort(so) 290 struct socket *so; 291{ 292 293 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 294} 295 296int 297soaccept(so, nam) 298 register struct socket *so; 299 struct sockaddr **nam; 300{ 301 int s = splnet(); 302 int error; 303 304 if ((so->so_state & SS_NOFDREF) == 0) 305 panic("soaccept: !NOFDREF"); 306 so->so_state &= ~SS_NOFDREF; 307 if ((so->so_state & SS_ISDISCONNECTED) == 0) 308 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 309 else { 310 if (nam) 311 *nam = 0; 312 error = 0; 313 } 314 splx(s); 315 return (error); 316} 317 318int 319soconnect(so, nam, p) 320 register struct socket *so; 321 struct sockaddr *nam; 322 struct proc *p; 323{ 324 int s; 325 int error; 326 327 if (so->so_options & SO_ACCEPTCONN) 328 return (EOPNOTSUPP); 329 s = splnet(); 330 /* 331 * If protocol is connection-based, can only connect once. 332 * Otherwise, if connected, try to disconnect first. 333 * This allows user to disconnect by connecting to, e.g., 334 * a null address. 335 */ 336 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 337 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 338 (error = sodisconnect(so)))) 339 error = EISCONN; 340 else 341 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 342 splx(s); 343 return (error); 344} 345 346int 347soconnect2(so1, so2) 348 register struct socket *so1; 349 struct socket *so2; 350{ 351 int s = splnet(); 352 int error; 353 354 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 355 splx(s); 356 return (error); 357} 358 359int 360sodisconnect(so) 361 register struct socket *so; 362{ 363 int s = splnet(); 364 int error; 365 366 if ((so->so_state & SS_ISCONNECTED) == 0) { 367 error = ENOTCONN; 368 goto bad; 369 } 370 if (so->so_state & SS_ISDISCONNECTING) { 371 error = EALREADY; 372 goto bad; 373 } 374 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 375bad: 376 splx(s); 377 return (error); 378} 379 380#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 381/* 382 * Send on a socket. 383 * If send must go all at once and message is larger than 384 * send buffering, then hard error. 385 * Lock against other senders. 386 * If must go all at once and not enough room now, then 387 * inform user that this would block and do nothing. 388 * Otherwise, if nonblocking, send as much as possible. 389 * The data to be sent is described by "uio" if nonzero, 390 * otherwise by the mbuf chain "top" (which must be null 391 * if uio is not). Data provided in mbuf chain must be small 392 * enough to send all at once. 393 * 394 * Returns nonzero on error, timeout or signal; callers 395 * must check for short counts if EINTR/ERESTART are returned. 396 * Data and control buffers are freed on return. 397 */ 398int 399sosend(so, addr, uio, top, control, flags, p) 400 register struct socket *so; 401 struct sockaddr *addr; 402 struct uio *uio; 403 struct mbuf *top; 404 struct mbuf *control; 405 int flags; 406 struct proc *p; 407{ 408 struct mbuf **mp; 409 register struct mbuf *m; 410 register long space, len, resid; 411 int clen = 0, error, s, dontroute, mlen; 412 int atomic = sosendallatonce(so) || top; 413 414 if (uio) 415 resid = uio->uio_resid; 416 else 417 resid = top->m_pkthdr.len; 418 /* 419 * In theory resid should be unsigned. 420 * However, space must be signed, as it might be less than 0 421 * if we over-committed, and we must use a signed comparison 422 * of space and resid. On the other hand, a negative resid 423 * causes us to loop sending 0-length segments to the protocol. 424 * 425 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 426 * type sockets since that's an error. 427 */ 428 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 429 error = EINVAL; 430 goto out; 431 } 432 433 dontroute = 434 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 435 (so->so_proto->pr_flags & PR_ATOMIC); 436 if (p) 437 p->p_stats->p_ru.ru_msgsnd++; 438 if (control) 439 clen = control->m_len; 440#define snderr(errno) { error = errno; splx(s); goto release; } 441 442restart: 443 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 444 if (error) 445 goto out; 446 do { 447 s = splnet(); 448 if (so->so_state & SS_CANTSENDMORE) 449 snderr(EPIPE); 450 if (so->so_error) { 451 error = so->so_error; 452 so->so_error = 0; 453 splx(s); 454 goto release; 455 } 456 if ((so->so_state & SS_ISCONNECTED) == 0) { 457 /* 458 * `sendto' and `sendmsg' is allowed on a connection- 459 * based socket if it supports implied connect. 460 * Return ENOTCONN if not connected and no address is 461 * supplied. 462 */ 463 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 464 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 465 if ((so->so_state & SS_ISCONFIRMING) == 0 && 466 !(resid == 0 && clen != 0)) 467 snderr(ENOTCONN); 468 } else if (addr == 0) 469 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 470 ENOTCONN : EDESTADDRREQ); 471 } 472 space = sbspace(&so->so_snd); 473 if (flags & MSG_OOB) 474 space += 1024; 475 if ((atomic && resid > so->so_snd.sb_hiwat) || 476 clen > so->so_snd.sb_hiwat) 477 snderr(EMSGSIZE); 478 if (space < resid + clen && uio && 479 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 480 if (so->so_state & SS_NBIO) 481 snderr(EWOULDBLOCK); 482 sbunlock(&so->so_snd); 483 error = sbwait(&so->so_snd); 484 splx(s); 485 if (error) 486 goto out; 487 goto restart; 488 } 489 splx(s); 490 mp = ⊤ 491 space -= clen; 492 do { 493 if (uio == NULL) { 494 /* 495 * Data is prepackaged in "top". 496 */ 497 resid = 0; 498 if (flags & MSG_EOR) 499 top->m_flags |= M_EOR; 500 } else do { 501 if (top == 0) { 502 MGETHDR(m, M_WAIT, MT_DATA); 503 mlen = MHLEN; 504 m->m_pkthdr.len = 0; 505 m->m_pkthdr.rcvif = (struct ifnet *)0; 506 } else { 507 MGET(m, M_WAIT, MT_DATA); 508 mlen = MLEN; 509 } 510 if (resid >= MINCLSIZE) { 511 MCLGET(m, M_WAIT); 512 if ((m->m_flags & M_EXT) == 0) 513 goto nopages; 514 mlen = MCLBYTES; 515 len = min(min(mlen, resid), space); 516 } else { 517nopages: 518 len = min(min(mlen, resid), space); 519 /* 520 * For datagram protocols, leave room 521 * for protocol headers in first mbuf. 522 */ 523 if (atomic && top == 0 && len < mlen) 524 MH_ALIGN(m, len); 525 } 526 space -= len; 527 error = uiomove(mtod(m, caddr_t), (int)len, uio); 528 resid = uio->uio_resid; 529 m->m_len = len; 530 *mp = m; 531 top->m_pkthdr.len += len; 532 if (error) 533 goto release; 534 mp = &m->m_next; 535 if (resid <= 0) { 536 if (flags & MSG_EOR) 537 top->m_flags |= M_EOR; 538 break; 539 } 540 } while (space > 0 && atomic); 541 if (dontroute) 542 so->so_options |= SO_DONTROUTE; 543 s = splnet(); /* XXX */ 544 /* 545 * XXX all the SS_CANTSENDMORE checks previously 546 * done could be out of date. We could have recieved 547 * a reset packet in an interrupt or maybe we slept 548 * while doing page faults in uiomove() etc. We could 549 * probably recheck again inside the splnet() protection 550 * here, but there are probably other places that this 551 * also happens. We must rethink this. 552 */ 553 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 554 (flags & MSG_OOB) ? PRUS_OOB : 555 /* 556 * If the user set MSG_EOF, the protocol 557 * understands this flag and nothing left to 558 * send then use PRU_SEND_EOF instead of PRU_SEND. 559 */ 560 ((flags & MSG_EOF) && 561 (so->so_proto->pr_flags & PR_IMPLOPCL) && 562 (resid <= 0)) ? 563 PRUS_EOF : 564 /* If there is more to send set PRUS_MORETOCOME */ 565 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 566 top, addr, control, p); 567 splx(s); 568 if (dontroute) 569 so->so_options &= ~SO_DONTROUTE; 570 clen = 0; 571 control = 0; 572 top = 0; 573 mp = ⊤ 574 if (error) 575 goto release; 576 } while (resid && space > 0); 577 } while (resid); 578 579release: 580 sbunlock(&so->so_snd); 581out: 582 if (top) 583 m_freem(top); 584 if (control) 585 m_freem(control); 586 return (error); 587} 588 589/* 590 * Implement receive operations on a socket. 591 * We depend on the way that records are added to the sockbuf 592 * by sbappend*. In particular, each record (mbufs linked through m_next) 593 * must begin with an address if the protocol so specifies, 594 * followed by an optional mbuf or mbufs containing ancillary data, 595 * and then zero or more mbufs of data. 596 * In order to avoid blocking network interrupts for the entire time here, 597 * we splx() while doing the actual copy to user space. 598 * Although the sockbuf is locked, new data may still be appended, 599 * and thus we must maintain consistency of the sockbuf during that time. 600 * 601 * The caller may receive the data as a single mbuf chain by supplying 602 * an mbuf **mp0 for use in returning the chain. The uio is then used 603 * only for the count in uio_resid. 604 */ 605int 606soreceive(so, psa, uio, mp0, controlp, flagsp) 607 register struct socket *so; 608 struct sockaddr **psa; 609 struct uio *uio; 610 struct mbuf **mp0; 611 struct mbuf **controlp; 612 int *flagsp; 613{ 614 register struct mbuf *m, **mp; 615 register int flags, len, error, s, offset; 616 struct protosw *pr = so->so_proto; 617 struct mbuf *nextrecord; 618 int moff, type = 0; 619 int orig_resid = uio->uio_resid; 620 621 mp = mp0; 622 if (psa) 623 *psa = 0; 624 if (controlp) 625 *controlp = 0; 626 if (flagsp) 627 flags = *flagsp &~ MSG_EOR; 628 else 629 flags = 0; 630 if (flags & MSG_OOB) { 631 m = m_get(M_WAIT, MT_DATA); 632 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 633 if (error) 634 goto bad; 635 do { 636 error = uiomove(mtod(m, caddr_t), 637 (int) min(uio->uio_resid, m->m_len), uio); 638 m = m_free(m); 639 } while (uio->uio_resid && error == 0 && m); 640bad: 641 if (m) 642 m_freem(m); 643 return (error); 644 } 645 if (mp) 646 *mp = (struct mbuf *)0; 647 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 648 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 649 650restart: 651 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 652 if (error) 653 return (error); 654 s = splnet(); 655 656 m = so->so_rcv.sb_mb; 657 /* 658 * If we have less data than requested, block awaiting more 659 * (subject to any timeout) if: 660 * 1. the current count is less than the low water mark, or 661 * 2. MSG_WAITALL is set, and it is possible to do the entire 662 * receive operation at once if we block (resid <= hiwat). 663 * 3. MSG_DONTWAIT is not set 664 * If MSG_WAITALL is set but resid is larger than the receive buffer, 665 * we have to do the receive in sections, and thus risk returning 666 * a short count if a timeout or signal occurs after we start. 667 */ 668 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 669 so->so_rcv.sb_cc < uio->uio_resid) && 670 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 671 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 672 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 673 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); 674 if (so->so_error) { 675 if (m) 676 goto dontblock; 677 error = so->so_error; 678 if ((flags & MSG_PEEK) == 0) 679 so->so_error = 0; 680 goto release; 681 } 682 if (so->so_state & SS_CANTRCVMORE) { 683 if (m) 684 goto dontblock; 685 else 686 goto release; 687 } 688 for (; m; m = m->m_next) 689 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 690 m = so->so_rcv.sb_mb; 691 goto dontblock; 692 } 693 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 694 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 695 error = ENOTCONN; 696 goto release; 697 } 698 if (uio->uio_resid == 0) 699 goto release; 700 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 701 error = EWOULDBLOCK; 702 goto release; 703 } 704 sbunlock(&so->so_rcv); 705 error = sbwait(&so->so_rcv); 706 splx(s); 707 if (error) 708 return (error); 709 goto restart; 710 } 711dontblock: 712 if (uio->uio_procp) 713 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 714 nextrecord = m->m_nextpkt; 715 if (pr->pr_flags & PR_ADDR) { 716 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 717 orig_resid = 0; 718 if (psa) 719 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 720 mp0 == 0); 721 if (flags & MSG_PEEK) { 722 m = m->m_next; 723 } else { 724 sbfree(&so->so_rcv, m); 725 MFREE(m, so->so_rcv.sb_mb); 726 m = so->so_rcv.sb_mb; 727 } 728 } 729 while (m && m->m_type == MT_CONTROL && error == 0) { 730 if (flags & MSG_PEEK) { 731 if (controlp) 732 *controlp = m_copy(m, 0, m->m_len); 733 m = m->m_next; 734 } else { 735 sbfree(&so->so_rcv, m); 736 if (controlp) { 737 if (pr->pr_domain->dom_externalize && 738 mtod(m, struct cmsghdr *)->cmsg_type == 739 SCM_RIGHTS) 740 error = (*pr->pr_domain->dom_externalize)(m); 741 *controlp = m; 742 so->so_rcv.sb_mb = m->m_next; 743 m->m_next = 0; 744 m = so->so_rcv.sb_mb; 745 } else { 746 MFREE(m, so->so_rcv.sb_mb); 747 m = so->so_rcv.sb_mb; 748 } 749 } 750 if (controlp) { 751 orig_resid = 0; 752 controlp = &(*controlp)->m_next; 753 } 754 } 755 if (m) { 756 if ((flags & MSG_PEEK) == 0) 757 m->m_nextpkt = nextrecord; 758 type = m->m_type; 759 if (type == MT_OOBDATA) 760 flags |= MSG_OOB; 761 } 762 moff = 0; 763 offset = 0; 764 while (m && uio->uio_resid > 0 && error == 0) { 765 if (m->m_type == MT_OOBDATA) { 766 if (type != MT_OOBDATA) 767 break; 768 } else if (type == MT_OOBDATA) 769 break; 770 else 771 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 772 ("receive 3")); 773 so->so_state &= ~SS_RCVATMARK; 774 len = uio->uio_resid; 775 if (so->so_oobmark && len > so->so_oobmark - offset) 776 len = so->so_oobmark - offset; 777 if (len > m->m_len - moff) 778 len = m->m_len - moff; 779 /* 780 * If mp is set, just pass back the mbufs. 781 * Otherwise copy them out via the uio, then free. 782 * Sockbuf must be consistent here (points to current mbuf, 783 * it points to next record) when we drop priority; 784 * we must note any additions to the sockbuf when we 785 * block interrupts again. 786 */ 787 if (mp == 0) { 788 splx(s); 789 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 790 s = splnet(); 791 if (error) 792 goto release; 793 } else 794 uio->uio_resid -= len; 795 if (len == m->m_len - moff) { 796 if (m->m_flags & M_EOR) 797 flags |= MSG_EOR; 798 if (flags & MSG_PEEK) { 799 m = m->m_next; 800 moff = 0; 801 } else { 802 nextrecord = m->m_nextpkt; 803 sbfree(&so->so_rcv, m); 804 if (mp) { 805 *mp = m; 806 mp = &m->m_next; 807 so->so_rcv.sb_mb = m = m->m_next; 808 *mp = (struct mbuf *)0; 809 } else { 810 MFREE(m, so->so_rcv.sb_mb); 811 m = so->so_rcv.sb_mb; 812 } 813 if (m) 814 m->m_nextpkt = nextrecord; 815 } 816 } else { 817 if (flags & MSG_PEEK) 818 moff += len; 819 else { 820 if (mp) 821 *mp = m_copym(m, 0, len, M_WAIT); 822 m->m_data += len; 823 m->m_len -= len; 824 so->so_rcv.sb_cc -= len; 825 } 826 } 827 if (so->so_oobmark) { 828 if ((flags & MSG_PEEK) == 0) { 829 so->so_oobmark -= len; 830 if (so->so_oobmark == 0) { 831 so->so_state |= SS_RCVATMARK; 832 break; 833 } 834 } else { 835 offset += len; 836 if (offset == so->so_oobmark) 837 break; 838 } 839 } 840 if (flags & MSG_EOR) 841 break; 842 /* 843 * If the MSG_WAITALL flag is set (for non-atomic socket), 844 * we must not quit until "uio->uio_resid == 0" or an error 845 * termination. If a signal/timeout occurs, return 846 * with a short count but without error. 847 * Keep sockbuf locked against other readers. 848 */ 849 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 850 !sosendallatonce(so) && !nextrecord) { 851 if (so->so_error || so->so_state & SS_CANTRCVMORE) 852 break; 853 error = sbwait(&so->so_rcv); 854 if (error) { 855 sbunlock(&so->so_rcv); 856 splx(s); 857 return (0); 858 } 859 m = so->so_rcv.sb_mb; 860 if (m) 861 nextrecord = m->m_nextpkt; 862 } 863 } 864 865 if (m && pr->pr_flags & PR_ATOMIC) { 866 flags |= MSG_TRUNC; 867 if ((flags & MSG_PEEK) == 0) 868 (void) sbdroprecord(&so->so_rcv); 869 } 870 if ((flags & MSG_PEEK) == 0) { 871 if (m == 0) 872 so->so_rcv.sb_mb = nextrecord; 873 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 874 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 875 } 876 if (orig_resid == uio->uio_resid && orig_resid && 877 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 878 sbunlock(&so->so_rcv); 879 splx(s); 880 goto restart; 881 } 882 883 if (flagsp) 884 *flagsp |= flags; 885release: 886 sbunlock(&so->so_rcv); 887 splx(s); 888 return (error); 889} 890 891int 892soshutdown(so, how) 893 register struct socket *so; 894 register int how; 895{ 896 register struct protosw *pr = so->so_proto; 897 898 how++; 899 if (how & FREAD) 900 sorflush(so); 901 if (how & FWRITE) 902 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 903 return (0); 904} 905 906void 907sorflush(so) 908 register struct socket *so; 909{ 910 register struct sockbuf *sb = &so->so_rcv; 911 register struct protosw *pr = so->so_proto; 912 register int s; 913 struct sockbuf asb; 914 915 sb->sb_flags |= SB_NOINTR; 916 (void) sblock(sb, M_WAITOK); 917 s = splimp(); 918 socantrcvmore(so); 919 sbunlock(sb); 920 asb = *sb; 921 bzero((caddr_t)sb, sizeof (*sb)); 922 splx(s); 923 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 924 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 925 sbrelease(&asb); 926} 927 928/* 929 * Perhaps this routine, and sooptcopyout(), below, ought to come in 930 * an additional variant to handle the case where the option value needs 931 * to be some kind of integer, but not a specific size. 932 * In addition to their use here, these functions are also called by the 933 * protocol-level pr_ctloutput() routines. 934 */ 935int 936sooptcopyin(sopt, buf, len, minlen) 937 struct sockopt *sopt; 938 void *buf; 939 size_t len; 940 size_t minlen; 941{ 942 size_t valsize; 943 944 /* 945 * If the user gives us more than we wanted, we ignore it, 946 * but if we don't get the minimum length the caller 947 * wants, we return EINVAL. On success, sopt->sopt_valsize 948 * is set to however much we actually retrieved. 949 */ 950 if ((valsize = sopt->sopt_valsize) < minlen) 951 return EINVAL; 952 if (valsize > len) 953 sopt->sopt_valsize = valsize = len; 954 955 if (sopt->sopt_p != 0) 956 return (copyin(sopt->sopt_val, buf, valsize)); 957 958 bcopy(sopt->sopt_val, buf, valsize); 959 return 0; 960} 961 962int 963sosetopt(so, sopt) 964 struct socket *so; 965 struct sockopt *sopt; 966{ 967 int error, optval; 968 struct linger l; 969 struct timeval tv; 970 u_long val; 971 972 error = 0; 973 if (sopt->sopt_level != SOL_SOCKET) { 974 if (so->so_proto && so->so_proto->pr_ctloutput) 975 return ((*so->so_proto->pr_ctloutput) 976 (so, sopt)); 977 error = ENOPROTOOPT; 978 } else { 979 switch (sopt->sopt_name) { 980 case SO_LINGER: 981 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 982 if (error) 983 goto bad; 984 985 so->so_linger = l.l_linger; 986 if (l.l_onoff) 987 so->so_options |= SO_LINGER; 988 else 989 so->so_options &= ~SO_LINGER; 990 break; 991 992 case SO_DEBUG: 993 case SO_KEEPALIVE: 994 case SO_DONTROUTE: 995 case SO_USELOOPBACK: 996 case SO_BROADCAST: 997 case SO_REUSEADDR: 998 case SO_REUSEPORT: 999 case SO_OOBINLINE: 1000 case SO_TIMESTAMP: 1001 error = sooptcopyin(sopt, &optval, sizeof optval, 1002 sizeof optval); 1003 if (error) 1004 goto bad; 1005 if (optval) 1006 so->so_options |= sopt->sopt_name; 1007 else 1008 so->so_options &= ~sopt->sopt_name; 1009 break; 1010 1011 case SO_SNDBUF: 1012 case SO_RCVBUF: 1013 case SO_SNDLOWAT: 1014 case SO_RCVLOWAT: 1015 error = sooptcopyin(sopt, &optval, sizeof optval, 1016 sizeof optval); 1017 if (error) 1018 goto bad; 1019 1020 /* 1021 * Values < 1 make no sense for any of these 1022 * options, so disallow them. 1023 */ 1024 if (optval < 1) { 1025 error = EINVAL; 1026 goto bad; 1027 } 1028 1029 switch (sopt->sopt_name) { 1030 case SO_SNDBUF: 1031 case SO_RCVBUF: 1032 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1033 &so->so_snd : &so->so_rcv, 1034 (u_long) optval) == 0) { 1035 error = ENOBUFS; 1036 goto bad; 1037 } 1038 break; 1039 1040 /* 1041 * Make sure the low-water is never greater than 1042 * the high-water. 1043 */ 1044 case SO_SNDLOWAT: 1045 so->so_snd.sb_lowat = 1046 (optval > so->so_snd.sb_hiwat) ? 1047 so->so_snd.sb_hiwat : optval; 1048 break; 1049 case SO_RCVLOWAT: 1050 so->so_rcv.sb_lowat = 1051 (optval > so->so_rcv.sb_hiwat) ? 1052 so->so_rcv.sb_hiwat : optval; 1053 break; 1054 } 1055 break; 1056 1057 case SO_SNDTIMEO: 1058 case SO_RCVTIMEO: 1059 error = sooptcopyin(sopt, &tv, sizeof tv, 1060 sizeof tv); 1061 if (error) 1062 goto bad; 1063 1064 /* assert(hz > 0); */ 1065 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1066 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1067 error = EDOM; 1068 goto bad; 1069 } 1070 /* assert(tick > 0); */ 1071 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1072 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1073 if (val > SHRT_MAX) { 1074 error = EDOM; 1075 goto bad; 1076 } 1077 1078 switch (sopt->sopt_name) { 1079 case SO_SNDTIMEO: 1080 so->so_snd.sb_timeo = val; 1081 break; 1082 case SO_RCVTIMEO: 1083 so->so_rcv.sb_timeo = val; 1084 break; 1085 } 1086 break; 1087 1088 default: 1089 error = ENOPROTOOPT; 1090 break; 1091 } 1092 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1093 (void) ((*so->so_proto->pr_ctloutput) 1094 (so, sopt)); 1095 } 1096 } 1097bad: 1098 return (error); 1099} 1100 1101/* Helper routine for getsockopt */ 1102int 1103sooptcopyout(sopt, buf, len) 1104 struct sockopt *sopt; 1105 void *buf; 1106 size_t len; 1107{ 1108 int error; 1109 size_t valsize; 1110 1111 error = 0; 1112 1113 /* 1114 * Documented get behavior is that we always return a value, 1115 * possibly truncated to fit in the user's buffer. 1116 * Traditional behavior is that we always tell the user 1117 * precisely how much we copied, rather than something useful 1118 * like the total amount we had available for her. 1119 * Note that this interface is not idempotent; the entire answer must 1120 * generated ahead of time. 1121 */ 1122 valsize = min(len, sopt->sopt_valsize); 1123 sopt->sopt_valsize = valsize; 1124 if (sopt->sopt_val != 0) { 1125 if (sopt->sopt_p != 0) 1126 error = copyout(buf, sopt->sopt_val, valsize); 1127 else 1128 bcopy(buf, sopt->sopt_val, valsize); 1129 } 1130 return error; 1131} 1132 1133int 1134sogetopt(so, sopt) 1135 struct socket *so; 1136 struct sockopt *sopt; 1137{ 1138 int error, optval; 1139 struct linger l; 1140 struct timeval tv; 1141 1142 error = 0; 1143 if (sopt->sopt_level != SOL_SOCKET) { 1144 if (so->so_proto && so->so_proto->pr_ctloutput) { 1145 return ((*so->so_proto->pr_ctloutput) 1146 (so, sopt)); 1147 } else 1148 return (ENOPROTOOPT); 1149 } else { 1150 switch (sopt->sopt_name) { 1151 case SO_LINGER: 1152 l.l_onoff = so->so_options & SO_LINGER; 1153 l.l_linger = so->so_linger; 1154 error = sooptcopyout(sopt, &l, sizeof l); 1155 break; 1156 1157 case SO_USELOOPBACK: 1158 case SO_DONTROUTE: 1159 case SO_DEBUG: 1160 case SO_KEEPALIVE: 1161 case SO_REUSEADDR: 1162 case SO_REUSEPORT: 1163 case SO_BROADCAST: 1164 case SO_OOBINLINE: 1165 case SO_TIMESTAMP: 1166 optval = so->so_options & sopt->sopt_name; 1167integer: 1168 error = sooptcopyout(sopt, &optval, sizeof optval); 1169 break; 1170 1171 case SO_TYPE: 1172 optval = so->so_type; 1173 goto integer; 1174 1175 case SO_ERROR: 1176 optval = so->so_error; 1177 so->so_error = 0; 1178 goto integer; 1179 1180 case SO_SNDBUF: 1181 optval = so->so_snd.sb_hiwat; 1182 goto integer; 1183 1184 case SO_RCVBUF: 1185 optval = so->so_rcv.sb_hiwat; 1186 goto integer; 1187 1188 case SO_SNDLOWAT: 1189 optval = so->so_snd.sb_lowat; 1190 goto integer; 1191 1192 case SO_RCVLOWAT: 1193 optval = so->so_rcv.sb_lowat; 1194 goto integer; 1195 1196 case SO_SNDTIMEO: 1197 case SO_RCVTIMEO: 1198 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1199 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1200 1201 tv.tv_sec = optval / hz; 1202 tv.tv_usec = (optval % hz) * tick; 1203 error = sooptcopyout(sopt, &tv, sizeof tv); 1204 break; 1205 1206 default: 1207 error = ENOPROTOOPT; 1208 break; 1209 } 1210 return (error); 1211 } 1212} 1213 1214void 1215sohasoutofband(so) 1216 register struct socket *so; 1217{ 1218 if (so->so_sigio != NULL) 1219 pgsigio(so->so_sigio, SIGURG, 0); 1220 selwakeup(&so->so_rcv.sb_sel); 1221} 1222 1223int 1224sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1225{ 1226 int revents = 0; 1227 int s = splnet(); 1228 1229 if (events & (POLLIN | POLLRDNORM)) 1230 if (soreadable(so)) 1231 revents |= events & (POLLIN | POLLRDNORM); 1232 1233 if (events & (POLLOUT | POLLWRNORM)) 1234 if (sowriteable(so)) 1235 revents |= events & (POLLOUT | POLLWRNORM); 1236 1237 if (events & (POLLPRI | POLLRDBAND)) 1238 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1239 revents |= events & (POLLPRI | POLLRDBAND); 1240 1241 if (revents == 0) { 1242 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1243 selrecord(p, &so->so_rcv.sb_sel); 1244 so->so_rcv.sb_flags |= SB_SEL; 1245 } 1246 1247 if (events & (POLLOUT | POLLWRNORM)) { 1248 selrecord(p, &so->so_snd.sb_sel); 1249 so->so_snd.sb_flags |= SB_SEL; 1250 } 1251 } 1252 1253 splx(s); 1254 return (revents); 1255}
|