uipc_socket.c revision 38705
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.44 1998/08/31 15:34:55 wollman Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52#include <sys/uio.h> 53#include <vm/vm_zone.h> 54 55#include <machine/limits.h> 56 57struct vm_zone *socket_zone; 58so_gen_t so_gencnt; /* generation count for sockets */ 59 60MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 61MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 62 63static int somaxconn = SOMAXCONN; 64SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65 0, ""); 66 67/* 68 * Socket operation routines. 69 * These routines are called by the routines in 70 * sys_socket.c or from a system process, and 71 * implement the semantics of socket operations by 72 * switching out to the protocol specific routines. 73 */ 74 75/* 76 * Get a socket structure from our zone, and initialize it. 77 * We don't implement `waitok' yet (see comments in uipc_domain.c). 78 * Note that it would probably be better to allocate socket 79 * and PCB at the same time, but I'm not convinced that all 80 * the protocols can be easily modified to do this. 81 */ 82struct socket * 83soalloc(waitok) 84 int waitok; 85{ 86 struct socket *so; 87 88 so = zalloci(socket_zone); 89 if (so) { 90 /* XXX race condition for reentrant kernel */ 91 bzero(so, sizeof *so); 92 so->so_gencnt = ++so_gencnt; 93 so->so_zone = socket_zone; 94 } 95 return so; 96} 97 98int 99socreate(dom, aso, type, proto, p) 100 int dom; 101 struct socket **aso; 102 register int type; 103 int proto; 104 struct proc *p; 105{ 106 register struct protosw *prp; 107 register struct socket *so; 108 register int error; 109 110 if (proto) 111 prp = pffindproto(dom, proto, type); 112 else 113 prp = pffindtype(dom, type); 114 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 115 return (EPROTONOSUPPORT); 116 if (prp->pr_type != type) 117 return (EPROTOTYPE); 118 so = soalloc(p != 0); 119 if (so == 0) 120 return (ENOBUFS); 121 122 TAILQ_INIT(&so->so_incomp); 123 TAILQ_INIT(&so->so_comp); 124 so->so_type = type; 125 if (p != 0) 126 so->so_uid = p->p_ucred->cr_uid; 127 so->so_proto = prp; 128 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 129 if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136} 137 138int 139sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143{ 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150} 151 152void 153sodealloc(so) 154 struct socket *so; 155{ 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158} 159 160int 161solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165{ 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181} 182 183void 184sofree(so) 185 register struct socket *so; 186{ 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207} 208 209/* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214int 215soclose(so) 216 register struct socket *so; 217{ 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 if (so->so_options & SO_ACCEPTCONN) { 222 struct socket *sp, *sonext; 223 224 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 225 sonext = sp->so_list.tqe_next; 226 (void) soabort(sp); 227 } 228 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 229 sonext = sp->so_list.tqe_next; 230 (void) soabort(sp); 231 } 232 } 233 if (so->so_pcb == 0) 234 goto discard; 235 if (so->so_state & SS_ISCONNECTED) { 236 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 237 error = sodisconnect(so); 238 if (error) 239 goto drop; 240 } 241 if (so->so_options & SO_LINGER) { 242 if ((so->so_state & SS_ISDISCONNECTING) && 243 (so->so_state & SS_NBIO)) 244 goto drop; 245 while (so->so_state & SS_ISCONNECTED) { 246 error = tsleep((caddr_t)&so->so_timeo, 247 PSOCK | PCATCH, "soclos", so->so_linger); 248 if (error) 249 break; 250 } 251 } 252 } 253drop: 254 if (so->so_pcb) { 255 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 256 if (error == 0) 257 error = error2; 258 } 259discard: 260 if (so->so_state & SS_NOFDREF) 261 panic("soclose: NOFDREF"); 262 so->so_state |= SS_NOFDREF; 263 sofree(so); 264 splx(s); 265 return (error); 266} 267 268/* 269 * Must be called at splnet... 270 */ 271int 272soabort(so) 273 struct socket *so; 274{ 275 276 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 277} 278 279int 280soaccept(so, nam) 281 register struct socket *so; 282 struct sockaddr **nam; 283{ 284 int s = splnet(); 285 int error; 286 287 if ((so->so_state & SS_NOFDREF) == 0) 288 panic("soaccept: !NOFDREF"); 289 so->so_state &= ~SS_NOFDREF; 290 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 291 splx(s); 292 return (error); 293} 294 295int 296soconnect(so, nam, p) 297 register struct socket *so; 298 struct sockaddr *nam; 299 struct proc *p; 300{ 301 int s; 302 int error; 303 304 if (so->so_options & SO_ACCEPTCONN) 305 return (EOPNOTSUPP); 306 s = splnet(); 307 /* 308 * If protocol is connection-based, can only connect once. 309 * Otherwise, if connected, try to disconnect first. 310 * This allows user to disconnect by connecting to, e.g., 311 * a null address. 312 */ 313 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 314 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 315 (error = sodisconnect(so)))) 316 error = EISCONN; 317 else 318 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 319 splx(s); 320 return (error); 321} 322 323int 324soconnect2(so1, so2) 325 register struct socket *so1; 326 struct socket *so2; 327{ 328 int s = splnet(); 329 int error; 330 331 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 332 splx(s); 333 return (error); 334} 335 336int 337sodisconnect(so) 338 register struct socket *so; 339{ 340 int s = splnet(); 341 int error; 342 343 if ((so->so_state & SS_ISCONNECTED) == 0) { 344 error = ENOTCONN; 345 goto bad; 346 } 347 if (so->so_state & SS_ISDISCONNECTING) { 348 error = EALREADY; 349 goto bad; 350 } 351 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 352bad: 353 splx(s); 354 return (error); 355} 356 357#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 358/* 359 * Send on a socket. 360 * If send must go all at once and message is larger than 361 * send buffering, then hard error. 362 * Lock against other senders. 363 * If must go all at once and not enough room now, then 364 * inform user that this would block and do nothing. 365 * Otherwise, if nonblocking, send as much as possible. 366 * The data to be sent is described by "uio" if nonzero, 367 * otherwise by the mbuf chain "top" (which must be null 368 * if uio is not). Data provided in mbuf chain must be small 369 * enough to send all at once. 370 * 371 * Returns nonzero on error, timeout or signal; callers 372 * must check for short counts if EINTR/ERESTART are returned. 373 * Data and control buffers are freed on return. 374 */ 375int 376sosend(so, addr, uio, top, control, flags, p) 377 register struct socket *so; 378 struct sockaddr *addr; 379 struct uio *uio; 380 struct mbuf *top; 381 struct mbuf *control; 382 int flags; 383 struct proc *p; 384{ 385 struct mbuf **mp; 386 register struct mbuf *m; 387 register long space, len, resid; 388 int clen = 0, error, s, dontroute, mlen; 389 int atomic = sosendallatonce(so) || top; 390 391 if (uio) 392 resid = uio->uio_resid; 393 else 394 resid = top->m_pkthdr.len; 395 /* 396 * In theory resid should be unsigned. 397 * However, space must be signed, as it might be less than 0 398 * if we over-committed, and we must use a signed comparison 399 * of space and resid. On the other hand, a negative resid 400 * causes us to loop sending 0-length segments to the protocol. 401 * 402 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 403 * type sockets since that's an error. 404 */ 405 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 406 error = EINVAL; 407 goto out; 408 } 409 410 dontroute = 411 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 412 (so->so_proto->pr_flags & PR_ATOMIC); 413 if (p) 414 p->p_stats->p_ru.ru_msgsnd++; 415 if (control) 416 clen = control->m_len; 417#define snderr(errno) { error = errno; splx(s); goto release; } 418 419restart: 420 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 421 if (error) 422 goto out; 423 do { 424 s = splnet(); 425 if (so->so_state & SS_CANTSENDMORE) 426 snderr(EPIPE); 427 if (so->so_error) { 428 error = so->so_error; 429 so->so_error = 0; 430 splx(s); 431 goto release; 432 } 433 if ((so->so_state & SS_ISCONNECTED) == 0) { 434 /* 435 * `sendto' and `sendmsg' is allowed on a connection- 436 * based socket if it supports implied connect. 437 * Return ENOTCONN if not connected and no address is 438 * supplied. 439 */ 440 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 441 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 442 if ((so->so_state & SS_ISCONFIRMING) == 0 && 443 !(resid == 0 && clen != 0)) 444 snderr(ENOTCONN); 445 } else if (addr == 0) 446 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 447 ENOTCONN : EDESTADDRREQ); 448 } 449 space = sbspace(&so->so_snd); 450 if (flags & MSG_OOB) 451 space += 1024; 452 if ((atomic && resid > so->so_snd.sb_hiwat) || 453 clen > so->so_snd.sb_hiwat) 454 snderr(EMSGSIZE); 455 if (space < resid + clen && uio && 456 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 457 if (so->so_state & SS_NBIO) 458 snderr(EWOULDBLOCK); 459 sbunlock(&so->so_snd); 460 error = sbwait(&so->so_snd); 461 splx(s); 462 if (error) 463 goto out; 464 goto restart; 465 } 466 splx(s); 467 mp = ⊤ 468 space -= clen; 469 do { 470 if (uio == NULL) { 471 /* 472 * Data is prepackaged in "top". 473 */ 474 resid = 0; 475 if (flags & MSG_EOR) 476 top->m_flags |= M_EOR; 477 } else do { 478 if (top == 0) { 479 MGETHDR(m, M_WAIT, MT_DATA); 480 mlen = MHLEN; 481 m->m_pkthdr.len = 0; 482 m->m_pkthdr.rcvif = (struct ifnet *)0; 483 } else { 484 MGET(m, M_WAIT, MT_DATA); 485 mlen = MLEN; 486 } 487 if (resid >= MINCLSIZE) { 488 MCLGET(m, M_WAIT); 489 if ((m->m_flags & M_EXT) == 0) 490 goto nopages; 491 mlen = MCLBYTES; 492 len = min(min(mlen, resid), space); 493 } else { 494nopages: 495 len = min(min(mlen, resid), space); 496 /* 497 * For datagram protocols, leave room 498 * for protocol headers in first mbuf. 499 */ 500 if (atomic && top == 0 && len < mlen) 501 MH_ALIGN(m, len); 502 } 503 space -= len; 504 error = uiomove(mtod(m, caddr_t), (int)len, uio); 505 resid = uio->uio_resid; 506 m->m_len = len; 507 *mp = m; 508 top->m_pkthdr.len += len; 509 if (error) 510 goto release; 511 mp = &m->m_next; 512 if (resid <= 0) { 513 if (flags & MSG_EOR) 514 top->m_flags |= M_EOR; 515 break; 516 } 517 } while (space > 0 && atomic); 518 if (dontroute) 519 so->so_options |= SO_DONTROUTE; 520 s = splnet(); /* XXX */ 521 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 522 (flags & MSG_OOB) ? PRUS_OOB : 523 /* 524 * If the user set MSG_EOF, the protocol 525 * understands this flag and nothing left to 526 * send then use PRU_SEND_EOF instead of PRU_SEND. 527 */ 528 ((flags & MSG_EOF) && 529 (so->so_proto->pr_flags & PR_IMPLOPCL) && 530 (resid <= 0)) ? 531 PRUS_EOF : 0, 532 top, addr, control, p); 533 splx(s); 534 if (dontroute) 535 so->so_options &= ~SO_DONTROUTE; 536 clen = 0; 537 control = 0; 538 top = 0; 539 mp = ⊤ 540 if (error) 541 goto release; 542 } while (resid && space > 0); 543 } while (resid); 544 545release: 546 sbunlock(&so->so_snd); 547out: 548 if (top) 549 m_freem(top); 550 if (control) 551 m_freem(control); 552 return (error); 553} 554 555/* 556 * Implement receive operations on a socket. 557 * We depend on the way that records are added to the sockbuf 558 * by sbappend*. In particular, each record (mbufs linked through m_next) 559 * must begin with an address if the protocol so specifies, 560 * followed by an optional mbuf or mbufs containing ancillary data, 561 * and then zero or more mbufs of data. 562 * In order to avoid blocking network interrupts for the entire time here, 563 * we splx() while doing the actual copy to user space. 564 * Although the sockbuf is locked, new data may still be appended, 565 * and thus we must maintain consistency of the sockbuf during that time. 566 * 567 * The caller may receive the data as a single mbuf chain by supplying 568 * an mbuf **mp0 for use in returning the chain. The uio is then used 569 * only for the count in uio_resid. 570 */ 571int 572soreceive(so, psa, uio, mp0, controlp, flagsp) 573 register struct socket *so; 574 struct sockaddr **psa; 575 struct uio *uio; 576 struct mbuf **mp0; 577 struct mbuf **controlp; 578 int *flagsp; 579{ 580 register struct mbuf *m, **mp; 581 register int flags, len, error, s, offset; 582 struct protosw *pr = so->so_proto; 583 struct mbuf *nextrecord; 584 int moff, type = 0; 585 int orig_resid = uio->uio_resid; 586 587 mp = mp0; 588 if (psa) 589 *psa = 0; 590 if (controlp) 591 *controlp = 0; 592 if (flagsp) 593 flags = *flagsp &~ MSG_EOR; 594 else 595 flags = 0; 596 if (flags & MSG_OOB) { 597 m = m_get(M_WAIT, MT_DATA); 598 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 599 if (error) 600 goto bad; 601 do { 602 error = uiomove(mtod(m, caddr_t), 603 (int) min(uio->uio_resid, m->m_len), uio); 604 m = m_free(m); 605 } while (uio->uio_resid && error == 0 && m); 606bad: 607 if (m) 608 m_freem(m); 609 return (error); 610 } 611 if (mp) 612 *mp = (struct mbuf *)0; 613 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 614 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 615 616restart: 617 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 618 if (error) 619 return (error); 620 s = splnet(); 621 622 m = so->so_rcv.sb_mb; 623 /* 624 * If we have less data than requested, block awaiting more 625 * (subject to any timeout) if: 626 * 1. the current count is less than the low water mark, or 627 * 2. MSG_WAITALL is set, and it is possible to do the entire 628 * receive operation at once if we block (resid <= hiwat). 629 * 3. MSG_DONTWAIT is not set 630 * If MSG_WAITALL is set but resid is larger than the receive buffer, 631 * we have to do the receive in sections, and thus risk returning 632 * a short count if a timeout or signal occurs after we start. 633 */ 634 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 635 so->so_rcv.sb_cc < uio->uio_resid) && 636 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 637 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 638 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 639#ifdef DIAGNOSTIC 640 if (m == 0 && so->so_rcv.sb_cc) 641 panic("receive 1"); 642#endif 643 if (so->so_error) { 644 if (m) 645 goto dontblock; 646 error = so->so_error; 647 if ((flags & MSG_PEEK) == 0) 648 so->so_error = 0; 649 goto release; 650 } 651 if (so->so_state & SS_CANTRCVMORE) { 652 if (m) 653 goto dontblock; 654 else 655 goto release; 656 } 657 for (; m; m = m->m_next) 658 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 659 m = so->so_rcv.sb_mb; 660 goto dontblock; 661 } 662 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 663 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 664 error = ENOTCONN; 665 goto release; 666 } 667 if (uio->uio_resid == 0) 668 goto release; 669 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 670 error = EWOULDBLOCK; 671 goto release; 672 } 673 sbunlock(&so->so_rcv); 674 error = sbwait(&so->so_rcv); 675 splx(s); 676 if (error) 677 return (error); 678 goto restart; 679 } 680dontblock: 681 if (uio->uio_procp) 682 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 683 nextrecord = m->m_nextpkt; 684 if (pr->pr_flags & PR_ADDR) { 685#ifdef DIAGNOSTIC 686 if (m->m_type != MT_SONAME) 687 panic("receive 1a"); 688#endif 689 orig_resid = 0; 690 if (psa) 691 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 692 mp0 == 0); 693 if (flags & MSG_PEEK) { 694 m = m->m_next; 695 } else { 696 sbfree(&so->so_rcv, m); 697 MFREE(m, so->so_rcv.sb_mb); 698 m = so->so_rcv.sb_mb; 699 } 700 } 701 while (m && m->m_type == MT_CONTROL && error == 0) { 702 if (flags & MSG_PEEK) { 703 if (controlp) 704 *controlp = m_copy(m, 0, m->m_len); 705 m = m->m_next; 706 } else { 707 sbfree(&so->so_rcv, m); 708 if (controlp) { 709 if (pr->pr_domain->dom_externalize && 710 mtod(m, struct cmsghdr *)->cmsg_type == 711 SCM_RIGHTS) 712 error = (*pr->pr_domain->dom_externalize)(m); 713 *controlp = m; 714 so->so_rcv.sb_mb = m->m_next; 715 m->m_next = 0; 716 m = so->so_rcv.sb_mb; 717 } else { 718 MFREE(m, so->so_rcv.sb_mb); 719 m = so->so_rcv.sb_mb; 720 } 721 } 722 if (controlp) { 723 orig_resid = 0; 724 controlp = &(*controlp)->m_next; 725 } 726 } 727 if (m) { 728 if ((flags & MSG_PEEK) == 0) 729 m->m_nextpkt = nextrecord; 730 type = m->m_type; 731 if (type == MT_OOBDATA) 732 flags |= MSG_OOB; 733 } 734 moff = 0; 735 offset = 0; 736 while (m && uio->uio_resid > 0 && error == 0) { 737 if (m->m_type == MT_OOBDATA) { 738 if (type != MT_OOBDATA) 739 break; 740 } else if (type == MT_OOBDATA) 741 break; 742#ifdef DIAGNOSTIC 743 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 744 panic("receive 3"); 745#endif 746 so->so_state &= ~SS_RCVATMARK; 747 len = uio->uio_resid; 748 if (so->so_oobmark && len > so->so_oobmark - offset) 749 len = so->so_oobmark - offset; 750 if (len > m->m_len - moff) 751 len = m->m_len - moff; 752 /* 753 * If mp is set, just pass back the mbufs. 754 * Otherwise copy them out via the uio, then free. 755 * Sockbuf must be consistent here (points to current mbuf, 756 * it points to next record) when we drop priority; 757 * we must note any additions to the sockbuf when we 758 * block interrupts again. 759 */ 760 if (mp == 0) { 761 splx(s); 762 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 763 s = splnet(); 764 if (error) 765 goto release; 766 } else 767 uio->uio_resid -= len; 768 if (len == m->m_len - moff) { 769 if (m->m_flags & M_EOR) 770 flags |= MSG_EOR; 771 if (flags & MSG_PEEK) { 772 m = m->m_next; 773 moff = 0; 774 } else { 775 nextrecord = m->m_nextpkt; 776 sbfree(&so->so_rcv, m); 777 if (mp) { 778 *mp = m; 779 mp = &m->m_next; 780 so->so_rcv.sb_mb = m = m->m_next; 781 *mp = (struct mbuf *)0; 782 } else { 783 MFREE(m, so->so_rcv.sb_mb); 784 m = so->so_rcv.sb_mb; 785 } 786 if (m) 787 m->m_nextpkt = nextrecord; 788 } 789 } else { 790 if (flags & MSG_PEEK) 791 moff += len; 792 else { 793 if (mp) 794 *mp = m_copym(m, 0, len, M_WAIT); 795 m->m_data += len; 796 m->m_len -= len; 797 so->so_rcv.sb_cc -= len; 798 } 799 } 800 if (so->so_oobmark) { 801 if ((flags & MSG_PEEK) == 0) { 802 so->so_oobmark -= len; 803 if (so->so_oobmark == 0) { 804 so->so_state |= SS_RCVATMARK; 805 break; 806 } 807 } else { 808 offset += len; 809 if (offset == so->so_oobmark) 810 break; 811 } 812 } 813 if (flags & MSG_EOR) 814 break; 815 /* 816 * If the MSG_WAITALL flag is set (for non-atomic socket), 817 * we must not quit until "uio->uio_resid == 0" or an error 818 * termination. If a signal/timeout occurs, return 819 * with a short count but without error. 820 * Keep sockbuf locked against other readers. 821 */ 822 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 823 !sosendallatonce(so) && !nextrecord) { 824 if (so->so_error || so->so_state & SS_CANTRCVMORE) 825 break; 826 error = sbwait(&so->so_rcv); 827 if (error) { 828 sbunlock(&so->so_rcv); 829 splx(s); 830 return (0); 831 } 832 m = so->so_rcv.sb_mb; 833 if (m) 834 nextrecord = m->m_nextpkt; 835 } 836 } 837 838 if (m && pr->pr_flags & PR_ATOMIC) { 839 flags |= MSG_TRUNC; 840 if ((flags & MSG_PEEK) == 0) 841 (void) sbdroprecord(&so->so_rcv); 842 } 843 if ((flags & MSG_PEEK) == 0) { 844 if (m == 0) 845 so->so_rcv.sb_mb = nextrecord; 846 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 847 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 848 } 849 if (orig_resid == uio->uio_resid && orig_resid && 850 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 851 sbunlock(&so->so_rcv); 852 splx(s); 853 goto restart; 854 } 855 856 if (flagsp) 857 *flagsp |= flags; 858release: 859 sbunlock(&so->so_rcv); 860 splx(s); 861 return (error); 862} 863 864int 865soshutdown(so, how) 866 register struct socket *so; 867 register int how; 868{ 869 register struct protosw *pr = so->so_proto; 870 871 how++; 872 if (how & FREAD) 873 sorflush(so); 874 if (how & FWRITE) 875 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 876 return (0); 877} 878 879void 880sorflush(so) 881 register struct socket *so; 882{ 883 register struct sockbuf *sb = &so->so_rcv; 884 register struct protosw *pr = so->so_proto; 885 register int s; 886 struct sockbuf asb; 887 888 sb->sb_flags |= SB_NOINTR; 889 (void) sblock(sb, M_WAITOK); 890 s = splimp(); 891 socantrcvmore(so); 892 sbunlock(sb); 893 asb = *sb; 894 bzero((caddr_t)sb, sizeof (*sb)); 895 splx(s); 896 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 897 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 898 sbrelease(&asb); 899} 900 901/* 902 * Perhaps this routine, and sooptcopyout(), below, ought to come in 903 * an additional variant to handle the case where the option value needs 904 * to be some kind of integer, but not a specific size. 905 * In addition to their use here, these functions are also called by the 906 * protocol-level pr_ctloutput() routines. 907 */ 908int 909sooptcopyin(sopt, buf, len, minlen) 910 struct sockopt *sopt; 911 void *buf; 912 size_t len; 913 size_t minlen; 914{ 915 size_t valsize; 916 917 /* 918 * If the user gives us more than we wanted, we ignore it, 919 * but if we don't get the minimum length the caller 920 * wants, we return EINVAL. On success, sopt->sopt_valsize 921 * is set to however much we actually retrieved. 922 */ 923 if ((valsize = sopt->sopt_valsize) < minlen) 924 return EINVAL; 925 if (valsize > len) 926 sopt->sopt_valsize = valsize = len; 927 928 if (sopt->sopt_p != 0) 929 return (copyin(sopt->sopt_val, buf, valsize)); 930 931 bcopy(sopt->sopt_val, buf, valsize); 932 return 0; 933} 934 935int 936sosetopt(so, sopt) 937 struct socket *so; 938 struct sockopt *sopt; 939{ 940 int error, optval; 941 struct linger l; 942 struct timeval tv; 943 short val; 944 945 error = 0; 946 if (sopt->sopt_level != SOL_SOCKET) { 947 if (so->so_proto && so->so_proto->pr_ctloutput) 948 return ((*so->so_proto->pr_ctloutput) 949 (so, sopt)); 950 error = ENOPROTOOPT; 951 } else { 952 switch (sopt->sopt_name) { 953 case SO_LINGER: 954 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 955 if (error) 956 goto bad; 957 958 so->so_linger = l.l_linger; 959 if (l.l_onoff) 960 so->so_options |= SO_LINGER; 961 else 962 so->so_options &= ~SO_LINGER; 963 break; 964 965 case SO_DEBUG: 966 case SO_KEEPALIVE: 967 case SO_DONTROUTE: 968 case SO_USELOOPBACK: 969 case SO_BROADCAST: 970 case SO_REUSEADDR: 971 case SO_REUSEPORT: 972 case SO_OOBINLINE: 973 case SO_TIMESTAMP: 974 error = sooptcopyin(sopt, &optval, sizeof optval, 975 sizeof optval); 976 if (error) 977 goto bad; 978 if (optval) 979 so->so_options |= sopt->sopt_name; 980 else 981 so->so_options &= ~sopt->sopt_name; 982 break; 983 984 case SO_SNDBUF: 985 case SO_RCVBUF: 986 case SO_SNDLOWAT: 987 case SO_RCVLOWAT: 988 error = sooptcopyin(sopt, &optval, sizeof optval, 989 sizeof optval); 990 if (error) 991 goto bad; 992 993 /* 994 * Values < 1 make no sense for any of these 995 * options, so disallow them. 996 */ 997 if (optval < 1) { 998 error = EINVAL; 999 goto bad; 1000 } 1001 1002 switch (sopt->sopt_name) { 1003 case SO_SNDBUF: 1004 case SO_RCVBUF: 1005 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1006 &so->so_snd : &so->so_rcv, 1007 (u_long) optval) == 0) { 1008 error = ENOBUFS; 1009 goto bad; 1010 } 1011 break; 1012 1013 /* 1014 * Make sure the low-water is never greater than 1015 * the high-water. 1016 */ 1017 case SO_SNDLOWAT: 1018 so->so_snd.sb_lowat = 1019 (optval > so->so_snd.sb_hiwat) ? 1020 so->so_snd.sb_hiwat : optval; 1021 break; 1022 case SO_RCVLOWAT: 1023 so->so_rcv.sb_lowat = 1024 (optval > so->so_rcv.sb_hiwat) ? 1025 so->so_rcv.sb_hiwat : optval; 1026 break; 1027 } 1028 break; 1029 1030 case SO_SNDTIMEO: 1031 case SO_RCVTIMEO: 1032 error = sooptcopyin(sopt, &tv, sizeof tv, 1033 sizeof tv); 1034 if (error) 1035 goto bad; 1036 1037 if (tv.tv_sec > SHRT_MAX / hz - hz) { 1038 error = EDOM; 1039 goto bad; 1040 } 1041 val = tv.tv_sec * hz + tv.tv_usec / tick; 1042 1043 switch (sopt->sopt_name) { 1044 case SO_SNDTIMEO: 1045 so->so_snd.sb_timeo = val; 1046 break; 1047 case SO_RCVTIMEO: 1048 so->so_rcv.sb_timeo = val; 1049 break; 1050 } 1051 break; 1052 1053 default: 1054 error = ENOPROTOOPT; 1055 break; 1056 } 1057 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1058 (void) ((*so->so_proto->pr_ctloutput) 1059 (so, sopt)); 1060 } 1061 } 1062bad: 1063 return (error); 1064} 1065 1066/* Helper routine for getsockopt */ 1067int 1068sooptcopyout(sopt, buf, len) 1069 struct sockopt *sopt; 1070 void *buf; 1071 size_t len; 1072{ 1073 int error; 1074 size_t valsize; 1075 1076 error = 0; 1077 1078 /* 1079 * Documented get behavior is that we always return a value, 1080 * possibly truncated to fit in the user's buffer. 1081 * Traditional behavior is that we always tell the user 1082 * precisely how much we copied, rather than something useful 1083 * like the total amount we had available for her. 1084 * Note that this interface is not idempotent; the entire answer must 1085 * generated ahead of time. 1086 */ 1087 valsize = min(len, sopt->sopt_valsize); 1088 sopt->sopt_valsize = valsize; 1089 if (sopt->sopt_val != 0) { 1090 if (sopt->sopt_p != 0) 1091 error = copyout(buf, sopt->sopt_val, valsize); 1092 else 1093 bcopy(buf, sopt->sopt_val, valsize); 1094 } 1095 return error; 1096} 1097 1098int 1099sogetopt(so, sopt) 1100 struct socket *so; 1101 struct sockopt *sopt; 1102{ 1103 int error, optval; 1104 struct linger l; 1105 struct timeval tv; 1106 1107 error = 0; 1108 if (sopt->sopt_level != SOL_SOCKET) { 1109 if (so->so_proto && so->so_proto->pr_ctloutput) { 1110 return ((*so->so_proto->pr_ctloutput) 1111 (so, sopt)); 1112 } else 1113 return (ENOPROTOOPT); 1114 } else { 1115 switch (sopt->sopt_name) { 1116 case SO_LINGER: 1117 l.l_onoff = so->so_options & SO_LINGER; 1118 l.l_linger = so->so_linger; 1119 error = sooptcopyout(sopt, &l, sizeof l); 1120 break; 1121 1122 case SO_USELOOPBACK: 1123 case SO_DONTROUTE: 1124 case SO_DEBUG: 1125 case SO_KEEPALIVE: 1126 case SO_REUSEADDR: 1127 case SO_REUSEPORT: 1128 case SO_BROADCAST: 1129 case SO_OOBINLINE: 1130 case SO_TIMESTAMP: 1131 optval = so->so_options & sopt->sopt_name; 1132integer: 1133 error = sooptcopyout(sopt, &optval, sizeof optval); 1134 break; 1135 1136 case SO_TYPE: 1137 optval = so->so_type; 1138 goto integer; 1139 1140 case SO_ERROR: 1141 optval = so->so_error; 1142 so->so_error = 0; 1143 goto integer; 1144 1145 case SO_SNDBUF: 1146 optval = so->so_snd.sb_hiwat; 1147 goto integer; 1148 1149 case SO_RCVBUF: 1150 optval = so->so_rcv.sb_hiwat; 1151 goto integer; 1152 1153 case SO_SNDLOWAT: 1154 optval = so->so_snd.sb_lowat; 1155 goto integer; 1156 1157 case SO_RCVLOWAT: 1158 optval = so->so_rcv.sb_lowat; 1159 goto integer; 1160 1161 case SO_SNDTIMEO: 1162 case SO_RCVTIMEO: 1163 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1164 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1165 1166 tv.tv_sec = optval / hz; 1167 tv.tv_usec = (optval % hz) * tick; 1168 error = sooptcopyout(sopt, &tv, sizeof tv); 1169 break; 1170 1171 default: 1172 error = ENOPROTOOPT; 1173 break; 1174 } 1175 return (error); 1176 } 1177} 1178 1179void 1180sohasoutofband(so) 1181 register struct socket *so; 1182{ 1183 struct proc *p; 1184 1185 if (so->so_pgid < 0) 1186 gsignal(-so->so_pgid, SIGURG); 1187 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1188 psignal(p, SIGURG); 1189 selwakeup(&so->so_rcv.sb_sel); 1190} 1191 1192int 1193sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1194{ 1195 int revents = 0; 1196 int s = splnet(); 1197 1198 if (events & (POLLIN | POLLRDNORM)) 1199 if (soreadable(so)) 1200 revents |= events & (POLLIN | POLLRDNORM); 1201 1202 if (events & (POLLOUT | POLLWRNORM)) 1203 if (sowriteable(so)) 1204 revents |= events & (POLLOUT | POLLWRNORM); 1205 1206 if (events & (POLLPRI | POLLRDBAND)) 1207 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1208 revents |= events & (POLLPRI | POLLRDBAND); 1209 1210 if (revents == 0) { 1211 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1212 selrecord(p, &so->so_rcv.sb_sel); 1213 so->so_rcv.sb_flags |= SB_SEL; 1214 } 1215 1216 if (events & (POLLOUT | POLLWRNORM)) { 1217 selrecord(p, &so->so_snd.sb_sel); 1218 so->so_snd.sb_flags |= SB_SEL; 1219 } 1220 } 1221 1222 splx(s); 1223 return (revents); 1224} 1225