uipc_socket.c revision 15701
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.17 1996/04/16 03:50:08 davidg Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/queue.h> 39#include <sys/systm.h> 40#include <sys/proc.h> 41#include <sys/file.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/domain.h> 45#include <sys/kernel.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52 53static int somaxconn = SOMAXCONN; 54SYSCTL_INT(_kern, KERN_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); 55 56/* 57 * Socket operation routines. 58 * These routines are called by the routines in 59 * sys_socket.c or from a system process, and 60 * implement the semantics of socket operations by 61 * switching out to the protocol specific routines. 62 */ 63/*ARGSUSED*/ 64int 65socreate(dom, aso, type, proto, p) 66 int dom; 67 struct socket **aso; 68 register int type; 69 int proto; 70 struct proc *p; 71{ 72 register struct protosw *prp; 73 register struct socket *so; 74 register int error; 75 76 if (proto) 77 prp = pffindproto(dom, proto, type); 78 else 79 prp = pffindtype(dom, type); 80 if (prp == 0 || prp->pr_usrreq == 0) 81 return (EPROTONOSUPPORT); 82 if (prp->pr_type != type) 83 return (EPROTOTYPE); 84 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 85 bzero((caddr_t)so, sizeof(*so)); 86 TAILQ_INIT(&so->so_incomp); 87 TAILQ_INIT(&so->so_comp); 88 so->so_type = type; 89 if (p->p_ucred->cr_uid == 0) 90 so->so_state = SS_PRIV; 91 so->so_proto = prp; 92 error = 93 (*prp->pr_usrreq)(so, PRU_ATTACH, 94 (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0); 95 if (error) { 96 so->so_state |= SS_NOFDREF; 97 sofree(so); 98 return (error); 99 } 100 *aso = so; 101 return (0); 102} 103 104int 105sobind(so, nam) 106 struct socket *so; 107 struct mbuf *nam; 108{ 109 int s = splnet(); 110 int error; 111 112 error = 113 (*so->so_proto->pr_usrreq)(so, PRU_BIND, 114 (struct mbuf *)0, nam, (struct mbuf *)0); 115 splx(s); 116 return (error); 117} 118 119int 120solisten(so, backlog) 121 register struct socket *so; 122 int backlog; 123{ 124 int s = splnet(), error; 125 126 error = 127 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, 128 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 129 if (error) { 130 splx(s); 131 return (error); 132 } 133 if (so->so_comp.tqh_first == NULL) 134 so->so_options |= SO_ACCEPTCONN; 135 if (backlog < 0 || backlog > somaxconn) 136 backlog = somaxconn; 137 so->so_qlimit = backlog; 138 splx(s); 139 return (0); 140} 141 142void 143sofree(so) 144 register struct socket *so; 145{ 146 struct socket *head = so->so_head; 147 148 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 149 return; 150 if (head != NULL) { 151 if (so->so_state & SS_INCOMP) { 152 TAILQ_REMOVE(&head->so_incomp, so, so_list); 153 } else if (so->so_state & SS_COMP) { 154 TAILQ_REMOVE(&head->so_comp, so, so_list); 155 } else { 156 panic("sofree: not queued"); 157 } 158 head->so_qlen--; 159 so->so_state &= ~(SS_INCOMP|SS_COMP); 160 so->so_head = NULL; 161 } 162 sbrelease(&so->so_snd); 163 sorflush(so); 164 FREE(so, M_SOCKET); 165} 166 167/* 168 * Close a socket on last file table reference removal. 169 * Initiate disconnect if connected. 170 * Free socket when disconnect complete. 171 */ 172int 173soclose(so) 174 register struct socket *so; 175{ 176 int s = splnet(); /* conservative */ 177 int error = 0; 178 179 if (so->so_options & SO_ACCEPTCONN) { 180 struct socket *sp, *sonext; 181 182 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 183 sonext = sp->so_list.tqe_next; 184 (void) soabort(sp); 185 } 186 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 187 sonext = sp->so_list.tqe_next; 188 (void) soabort(sp); 189 } 190 } 191 if (so->so_pcb == 0) 192 goto discard; 193 if (so->so_state & SS_ISCONNECTED) { 194 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 195 error = sodisconnect(so); 196 if (error) 197 goto drop; 198 } 199 if (so->so_options & SO_LINGER) { 200 if ((so->so_state & SS_ISDISCONNECTING) && 201 (so->so_state & SS_NBIO)) 202 goto drop; 203 while (so->so_state & SS_ISCONNECTED) { 204 error = tsleep((caddr_t)&so->so_timeo, 205 PSOCK | PCATCH, "soclos", so->so_linger); 206 if (error) 207 break; 208 } 209 } 210 } 211drop: 212 if (so->so_pcb) { 213 int error2 = 214 (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 215 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 216 if (error == 0) 217 error = error2; 218 } 219discard: 220 if (so->so_state & SS_NOFDREF) 221 panic("soclose: NOFDREF"); 222 so->so_state |= SS_NOFDREF; 223 sofree(so); 224 splx(s); 225 return (error); 226} 227 228/* 229 * Must be called at splnet... 230 */ 231int 232soabort(so) 233 struct socket *so; 234{ 235 236 return ( 237 (*so->so_proto->pr_usrreq)(so, PRU_ABORT, 238 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); 239} 240 241int 242soaccept(so, nam) 243 register struct socket *so; 244 struct mbuf *nam; 245{ 246 int s = splnet(); 247 int error; 248 249 if ((so->so_state & SS_NOFDREF) == 0) 250 panic("soaccept: !NOFDREF"); 251 so->so_state &= ~SS_NOFDREF; 252 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 253 (struct mbuf *)0, nam, (struct mbuf *)0); 254 splx(s); 255 return (error); 256} 257 258int 259soconnect(so, nam) 260 register struct socket *so; 261 struct mbuf *nam; 262{ 263 int s; 264 int error; 265 266 if (so->so_options & SO_ACCEPTCONN) 267 return (EOPNOTSUPP); 268 s = splnet(); 269 /* 270 * If protocol is connection-based, can only connect once. 271 * Otherwise, if connected, try to disconnect first. 272 * This allows user to disconnect by connecting to, e.g., 273 * a null address. 274 */ 275 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 276 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 277 (error = sodisconnect(so)))) 278 error = EISCONN; 279 else 280 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 281 (struct mbuf *)0, nam, (struct mbuf *)0); 282 splx(s); 283 return (error); 284} 285 286int 287soconnect2(so1, so2) 288 register struct socket *so1; 289 struct socket *so2; 290{ 291 int s = splnet(); 292 int error; 293 294 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 295 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0); 296 splx(s); 297 return (error); 298} 299 300int 301sodisconnect(so) 302 register struct socket *so; 303{ 304 int s = splnet(); 305 int error; 306 307 if ((so->so_state & SS_ISCONNECTED) == 0) { 308 error = ENOTCONN; 309 goto bad; 310 } 311 if (so->so_state & SS_ISDISCONNECTING) { 312 error = EALREADY; 313 goto bad; 314 } 315 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 316 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); 317bad: 318 splx(s); 319 return (error); 320} 321 322#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 323/* 324 * Send on a socket. 325 * If send must go all at once and message is larger than 326 * send buffering, then hard error. 327 * Lock against other senders. 328 * If must go all at once and not enough room now, then 329 * inform user that this would block and do nothing. 330 * Otherwise, if nonblocking, send as much as possible. 331 * The data to be sent is described by "uio" if nonzero, 332 * otherwise by the mbuf chain "top" (which must be null 333 * if uio is not). Data provided in mbuf chain must be small 334 * enough to send all at once. 335 * 336 * Returns nonzero on error, timeout or signal; callers 337 * must check for short counts if EINTR/ERESTART are returned. 338 * Data and control buffers are freed on return. 339 */ 340int 341sosend(so, addr, uio, top, control, flags) 342 register struct socket *so; 343 struct mbuf *addr; 344 struct uio *uio; 345 struct mbuf *top; 346 struct mbuf *control; 347 int flags; 348{ 349 struct proc *p = curproc; /* XXX */ 350 struct mbuf **mp; 351 register struct mbuf *m; 352 register long space, len, resid; 353 int clen = 0, error, s, dontroute, mlen; 354 int atomic = sosendallatonce(so) || top; 355 356 if (uio) 357 resid = uio->uio_resid; 358 else 359 resid = top->m_pkthdr.len; 360 /* 361 * In theory resid should be unsigned. 362 * However, space must be signed, as it might be less than 0 363 * if we over-committed, and we must use a signed comparison 364 * of space and resid. On the other hand, a negative resid 365 * causes us to loop sending 0-length segments to the protocol. 366 */ 367 if (resid < 0) 368 return (EINVAL); 369 dontroute = 370 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 371 (so->so_proto->pr_flags & PR_ATOMIC); 372 p->p_stats->p_ru.ru_msgsnd++; 373 if (control) 374 clen = control->m_len; 375#define snderr(errno) { error = errno; splx(s); goto release; } 376 377restart: 378 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 379 if (error) 380 goto out; 381 do { 382 s = splnet(); 383 if (so->so_state & SS_CANTSENDMORE) 384 snderr(EPIPE); 385 if (so->so_error) 386 snderr(so->so_error); 387 if ((so->so_state & SS_ISCONNECTED) == 0) { 388 /* 389 * `sendto' and `sendmsg' is allowed on a connection- 390 * based socket if it supports implied connect. 391 * Return ENOTCONN if not connected and no address is 392 * supplied. 393 */ 394 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 395 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 396 if ((so->so_state & SS_ISCONFIRMING) == 0 && 397 !(resid == 0 && clen != 0)) 398 snderr(ENOTCONN); 399 } else if (addr == 0) 400 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 401 ENOTCONN : EDESTADDRREQ); 402 } 403 space = sbspace(&so->so_snd); 404 if (flags & MSG_OOB) 405 space += 1024; 406 if ((atomic && resid > so->so_snd.sb_hiwat) || 407 clen > so->so_snd.sb_hiwat) 408 snderr(EMSGSIZE); 409 if (space < resid + clen && uio && 410 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 411 if (so->so_state & SS_NBIO) 412 snderr(EWOULDBLOCK); 413 sbunlock(&so->so_snd); 414 error = sbwait(&so->so_snd); 415 splx(s); 416 if (error) 417 goto out; 418 goto restart; 419 } 420 splx(s); 421 mp = ⊤ 422 space -= clen; 423 do { 424 if (uio == NULL) { 425 /* 426 * Data is prepackaged in "top". 427 */ 428 resid = 0; 429 if (flags & MSG_EOR) 430 top->m_flags |= M_EOR; 431 } else do { 432 if (top == 0) { 433 MGETHDR(m, M_WAIT, MT_DATA); 434 mlen = MHLEN; 435 m->m_pkthdr.len = 0; 436 m->m_pkthdr.rcvif = (struct ifnet *)0; 437 } else { 438 MGET(m, M_WAIT, MT_DATA); 439 mlen = MLEN; 440 } 441 if (resid >= MINCLSIZE) { 442 MCLGET(m, M_WAIT); 443 if ((m->m_flags & M_EXT) == 0) 444 goto nopages; 445 mlen = MCLBYTES; 446 len = min(min(mlen, resid), space); 447 } else { 448nopages: 449 len = min(min(mlen, resid), space); 450 /* 451 * For datagram protocols, leave room 452 * for protocol headers in first mbuf. 453 */ 454 if (atomic && top == 0 && len < mlen) 455 MH_ALIGN(m, len); 456 } 457 space -= len; 458 error = uiomove(mtod(m, caddr_t), (int)len, uio); 459 resid = uio->uio_resid; 460 m->m_len = len; 461 *mp = m; 462 top->m_pkthdr.len += len; 463 if (error) 464 goto release; 465 mp = &m->m_next; 466 if (resid <= 0) { 467 if (flags & MSG_EOR) 468 top->m_flags |= M_EOR; 469 break; 470 } 471 } while (space > 0 && atomic); 472 if (dontroute) 473 so->so_options |= SO_DONTROUTE; 474 s = splnet(); /* XXX */ 475 error = (*so->so_proto->pr_usrreq)(so, 476 (flags & MSG_OOB) ? PRU_SENDOOB : 477 /* 478 * If the user set MSG_EOF, the protocol 479 * understands this flag and nothing left to 480 * send then use PRU_SEND_EOF instead of PRU_SEND. 481 */ 482 ((flags & MSG_EOF) && 483 (so->so_proto->pr_flags & PR_IMPLOPCL) && 484 (resid <= 0)) ? 485 PRU_SEND_EOF : PRU_SEND, 486 top, addr, control); 487 splx(s); 488 if (dontroute) 489 so->so_options &= ~SO_DONTROUTE; 490 clen = 0; 491 control = 0; 492 top = 0; 493 mp = ⊤ 494 if (error) 495 goto release; 496 } while (resid && space > 0); 497 } while (resid); 498 499release: 500 sbunlock(&so->so_snd); 501out: 502 if (top) 503 m_freem(top); 504 if (control) 505 m_freem(control); 506 return (error); 507} 508 509/* 510 * Implement receive operations on a socket. 511 * We depend on the way that records are added to the sockbuf 512 * by sbappend*. In particular, each record (mbufs linked through m_next) 513 * must begin with an address if the protocol so specifies, 514 * followed by an optional mbuf or mbufs containing ancillary data, 515 * and then zero or more mbufs of data. 516 * In order to avoid blocking network interrupts for the entire time here, 517 * we splx() while doing the actual copy to user space. 518 * Although the sockbuf is locked, new data may still be appended, 519 * and thus we must maintain consistency of the sockbuf during that time. 520 * 521 * The caller may receive the data as a single mbuf chain by supplying 522 * an mbuf **mp0 for use in returning the chain. The uio is then used 523 * only for the count in uio_resid. 524 */ 525int 526soreceive(so, paddr, uio, mp0, controlp, flagsp) 527 register struct socket *so; 528 struct mbuf **paddr; 529 struct uio *uio; 530 struct mbuf **mp0; 531 struct mbuf **controlp; 532 int *flagsp; 533{ 534 register struct mbuf *m, **mp; 535 register int flags, len, error, s, offset; 536 struct protosw *pr = so->so_proto; 537 struct mbuf *nextrecord; 538 int moff, type = 0; 539 int orig_resid = uio->uio_resid; 540 541 mp = mp0; 542 if (paddr) 543 *paddr = 0; 544 if (controlp) 545 *controlp = 0; 546 if (flagsp) 547 flags = *flagsp &~ MSG_EOR; 548 else 549 flags = 0; 550 if (flags & MSG_OOB) { 551 m = m_get(M_WAIT, MT_DATA); 552 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, 553 m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0); 554 if (error) 555 goto bad; 556 do { 557 error = uiomove(mtod(m, caddr_t), 558 (int) min(uio->uio_resid, m->m_len), uio); 559 m = m_free(m); 560 } while (uio->uio_resid && error == 0 && m); 561bad: 562 if (m) 563 m_freem(m); 564 return (error); 565 } 566 if (mp) 567 *mp = (struct mbuf *)0; 568 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 569 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 570 (struct mbuf *)0, (struct mbuf *)0); 571 572restart: 573 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 574 if (error) 575 return (error); 576 s = splnet(); 577 578 m = so->so_rcv.sb_mb; 579 /* 580 * If we have less data than requested, block awaiting more 581 * (subject to any timeout) if: 582 * 1. the current count is less than the low water mark, or 583 * 2. MSG_WAITALL is set, and it is possible to do the entire 584 * receive operation at once if we block (resid <= hiwat). 585 * 3. MSG_DONTWAIT is not set 586 * If MSG_WAITALL is set but resid is larger than the receive buffer, 587 * we have to do the receive in sections, and thus risk returning 588 * a short count if a timeout or signal occurs after we start. 589 */ 590 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 591 so->so_rcv.sb_cc < uio->uio_resid) && 592 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 593 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 594 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 595#ifdef DIAGNOSTIC 596 if (m == 0 && so->so_rcv.sb_cc) 597 panic("receive 1"); 598#endif 599 if (so->so_error) { 600 if (m) 601 goto dontblock; 602 error = so->so_error; 603 if ((flags & MSG_PEEK) == 0) 604 so->so_error = 0; 605 goto release; 606 } 607 if (so->so_state & SS_CANTRCVMORE) { 608 if (m) 609 goto dontblock; 610 else 611 goto release; 612 } 613 for (; m; m = m->m_next) 614 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 615 m = so->so_rcv.sb_mb; 616 goto dontblock; 617 } 618 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 619 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 620 error = ENOTCONN; 621 goto release; 622 } 623 if (uio->uio_resid == 0) 624 goto release; 625 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 626 error = EWOULDBLOCK; 627 goto release; 628 } 629 sbunlock(&so->so_rcv); 630 error = sbwait(&so->so_rcv); 631 splx(s); 632 if (error) 633 return (error); 634 goto restart; 635 } 636dontblock: 637 if (uio->uio_procp) 638 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 639 nextrecord = m->m_nextpkt; 640 if (pr->pr_flags & PR_ADDR) { 641#ifdef DIAGNOSTIC 642 if (m->m_type != MT_SONAME) 643 panic("receive 1a"); 644#endif 645 orig_resid = 0; 646 if (flags & MSG_PEEK) { 647 if (paddr) 648 *paddr = m_copy(m, 0, m->m_len); 649 m = m->m_next; 650 } else { 651 sbfree(&so->so_rcv, m); 652 if (paddr) { 653 *paddr = m; 654 so->so_rcv.sb_mb = m->m_next; 655 m->m_next = 0; 656 m = so->so_rcv.sb_mb; 657 } else { 658 MFREE(m, so->so_rcv.sb_mb); 659 m = so->so_rcv.sb_mb; 660 } 661 } 662 } 663 while (m && m->m_type == MT_CONTROL && error == 0) { 664 if (flags & MSG_PEEK) { 665 if (controlp) 666 *controlp = m_copy(m, 0, m->m_len); 667 m = m->m_next; 668 } else { 669 sbfree(&so->so_rcv, m); 670 if (controlp) { 671 if (pr->pr_domain->dom_externalize && 672 mtod(m, struct cmsghdr *)->cmsg_type == 673 SCM_RIGHTS) 674 error = (*pr->pr_domain->dom_externalize)(m); 675 *controlp = m; 676 so->so_rcv.sb_mb = m->m_next; 677 m->m_next = 0; 678 m = so->so_rcv.sb_mb; 679 } else { 680 MFREE(m, so->so_rcv.sb_mb); 681 m = so->so_rcv.sb_mb; 682 } 683 } 684 if (controlp) { 685 orig_resid = 0; 686 controlp = &(*controlp)->m_next; 687 } 688 } 689 if (m) { 690 if ((flags & MSG_PEEK) == 0) 691 m->m_nextpkt = nextrecord; 692 type = m->m_type; 693 if (type == MT_OOBDATA) 694 flags |= MSG_OOB; 695 } 696 moff = 0; 697 offset = 0; 698 while (m && uio->uio_resid > 0 && error == 0) { 699 if (m->m_type == MT_OOBDATA) { 700 if (type != MT_OOBDATA) 701 break; 702 } else if (type == MT_OOBDATA) 703 break; 704#ifdef DIAGNOSTIC 705 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 706 panic("receive 3"); 707#endif 708 so->so_state &= ~SS_RCVATMARK; 709 len = uio->uio_resid; 710 if (so->so_oobmark && len > so->so_oobmark - offset) 711 len = so->so_oobmark - offset; 712 if (len > m->m_len - moff) 713 len = m->m_len - moff; 714 /* 715 * If mp is set, just pass back the mbufs. 716 * Otherwise copy them out via the uio, then free. 717 * Sockbuf must be consistent here (points to current mbuf, 718 * it points to next record) when we drop priority; 719 * we must note any additions to the sockbuf when we 720 * block interrupts again. 721 */ 722 if (mp == 0) { 723 splx(s); 724 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 725 s = splnet(); 726 } else 727 uio->uio_resid -= len; 728 if (len == m->m_len - moff) { 729 if (m->m_flags & M_EOR) 730 flags |= MSG_EOR; 731 if (flags & MSG_PEEK) { 732 m = m->m_next; 733 moff = 0; 734 } else { 735 nextrecord = m->m_nextpkt; 736 sbfree(&so->so_rcv, m); 737 if (mp) { 738 *mp = m; 739 mp = &m->m_next; 740 so->so_rcv.sb_mb = m = m->m_next; 741 *mp = (struct mbuf *)0; 742 } else { 743 MFREE(m, so->so_rcv.sb_mb); 744 m = so->so_rcv.sb_mb; 745 } 746 if (m) 747 m->m_nextpkt = nextrecord; 748 } 749 } else { 750 if (flags & MSG_PEEK) 751 moff += len; 752 else { 753 if (mp) 754 *mp = m_copym(m, 0, len, M_WAIT); 755 m->m_data += len; 756 m->m_len -= len; 757 so->so_rcv.sb_cc -= len; 758 } 759 } 760 if (so->so_oobmark) { 761 if ((flags & MSG_PEEK) == 0) { 762 so->so_oobmark -= len; 763 if (so->so_oobmark == 0) { 764 so->so_state |= SS_RCVATMARK; 765 break; 766 } 767 } else { 768 offset += len; 769 if (offset == so->so_oobmark) 770 break; 771 } 772 } 773 if (flags & MSG_EOR) 774 break; 775 /* 776 * If the MSG_WAITALL flag is set (for non-atomic socket), 777 * we must not quit until "uio->uio_resid == 0" or an error 778 * termination. If a signal/timeout occurs, return 779 * with a short count but without error. 780 * Keep sockbuf locked against other readers. 781 */ 782 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 783 !sosendallatonce(so) && !nextrecord) { 784 if (so->so_error || so->so_state & SS_CANTRCVMORE) 785 break; 786 error = sbwait(&so->so_rcv); 787 if (error) { 788 sbunlock(&so->so_rcv); 789 splx(s); 790 return (0); 791 } 792 m = so->so_rcv.sb_mb; 793 if (m) 794 nextrecord = m->m_nextpkt; 795 } 796 } 797 798 if (m && pr->pr_flags & PR_ATOMIC) { 799 flags |= MSG_TRUNC; 800 if ((flags & MSG_PEEK) == 0) 801 (void) sbdroprecord(&so->so_rcv); 802 } 803 if ((flags & MSG_PEEK) == 0) { 804 if (m == 0) 805 so->so_rcv.sb_mb = nextrecord; 806 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 807 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 808 (struct mbuf *)flags, (struct mbuf *)0); 809 } 810 if (orig_resid == uio->uio_resid && orig_resid && 811 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 812 sbunlock(&so->so_rcv); 813 splx(s); 814 goto restart; 815 } 816 817 if (flagsp) 818 *flagsp |= flags; 819release: 820 sbunlock(&so->so_rcv); 821 splx(s); 822 return (error); 823} 824 825int 826soshutdown(so, how) 827 register struct socket *so; 828 register int how; 829{ 830 register struct protosw *pr = so->so_proto; 831 832 how++; 833 if (how & FREAD) 834 sorflush(so); 835 if (how & FWRITE) 836 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN, 837 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); 838 return (0); 839} 840 841void 842sorflush(so) 843 register struct socket *so; 844{ 845 register struct sockbuf *sb = &so->so_rcv; 846 register struct protosw *pr = so->so_proto; 847 register int s; 848 struct sockbuf asb; 849 850 sb->sb_flags |= SB_NOINTR; 851 (void) sblock(sb, M_WAITOK); 852 s = splimp(); 853 socantrcvmore(so); 854 sbunlock(sb); 855 asb = *sb; 856 bzero((caddr_t)sb, sizeof (*sb)); 857 splx(s); 858 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 859 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 860 sbrelease(&asb); 861} 862 863int 864sosetopt(so, level, optname, m0) 865 register struct socket *so; 866 int level, optname; 867 struct mbuf *m0; 868{ 869 int error = 0; 870 register struct mbuf *m = m0; 871 872 if (level != SOL_SOCKET) { 873 if (so->so_proto && so->so_proto->pr_ctloutput) 874 return ((*so->so_proto->pr_ctloutput) 875 (PRCO_SETOPT, so, level, optname, &m0)); 876 error = ENOPROTOOPT; 877 } else { 878 switch (optname) { 879 880 case SO_LINGER: 881 if (m == NULL || m->m_len != sizeof (struct linger)) { 882 error = EINVAL; 883 goto bad; 884 } 885 so->so_linger = mtod(m, struct linger *)->l_linger; 886 /* fall thru... */ 887 888 case SO_DEBUG: 889 case SO_KEEPALIVE: 890 case SO_DONTROUTE: 891 case SO_USELOOPBACK: 892 case SO_BROADCAST: 893 case SO_REUSEADDR: 894 case SO_REUSEPORT: 895 case SO_OOBINLINE: 896 case SO_TIMESTAMP: 897 if (m == NULL || m->m_len < sizeof (int)) { 898 error = EINVAL; 899 goto bad; 900 } 901 if (*mtod(m, int *)) 902 so->so_options |= optname; 903 else 904 so->so_options &= ~optname; 905 break; 906 907 case SO_SNDBUF: 908 case SO_RCVBUF: 909 case SO_SNDLOWAT: 910 case SO_RCVLOWAT: 911 if (m == NULL || m->m_len < sizeof (int)) { 912 error = EINVAL; 913 goto bad; 914 } 915 switch (optname) { 916 917 case SO_SNDBUF: 918 case SO_RCVBUF: 919 if (sbreserve(optname == SO_SNDBUF ? 920 &so->so_snd : &so->so_rcv, 921 (u_long) *mtod(m, int *)) == 0) { 922 error = ENOBUFS; 923 goto bad; 924 } 925 break; 926 927 case SO_SNDLOWAT: 928 so->so_snd.sb_lowat = *mtod(m, int *); 929 break; 930 case SO_RCVLOWAT: 931 so->so_rcv.sb_lowat = *mtod(m, int *); 932 break; 933 } 934 break; 935 936 case SO_SNDTIMEO: 937 case SO_RCVTIMEO: 938 { 939 struct timeval *tv; 940 short val; 941 942 if (m == NULL || m->m_len < sizeof (*tv)) { 943 error = EINVAL; 944 goto bad; 945 } 946 tv = mtod(m, struct timeval *); 947 if (tv->tv_sec > SHRT_MAX / hz - hz) { 948 error = EDOM; 949 goto bad; 950 } 951 val = tv->tv_sec * hz + tv->tv_usec / tick; 952 953 switch (optname) { 954 955 case SO_SNDTIMEO: 956 so->so_snd.sb_timeo = val; 957 break; 958 case SO_RCVTIMEO: 959 so->so_rcv.sb_timeo = val; 960 break; 961 } 962 break; 963 } 964 965 case SO_PRIVSTATE: 966 /* we don't care what the parameter is... */ 967 so->so_state &= ~SS_PRIV; 968 break; 969 970 default: 971 error = ENOPROTOOPT; 972 break; 973 } 974 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 975 (void) ((*so->so_proto->pr_ctloutput) 976 (PRCO_SETOPT, so, level, optname, &m0)); 977 m = NULL; /* freed by protocol */ 978 } 979 } 980bad: 981 if (m) 982 (void) m_free(m); 983 return (error); 984} 985 986int 987sogetopt(so, level, optname, mp) 988 register struct socket *so; 989 int level, optname; 990 struct mbuf **mp; 991{ 992 register struct mbuf *m; 993 994 if (level != SOL_SOCKET) { 995 if (so->so_proto && so->so_proto->pr_ctloutput) { 996 return ((*so->so_proto->pr_ctloutput) 997 (PRCO_GETOPT, so, level, optname, mp)); 998 } else 999 return (ENOPROTOOPT); 1000 } else { 1001 m = m_get(M_WAIT, MT_SOOPTS); 1002 m->m_len = sizeof (int); 1003 1004 switch (optname) { 1005 1006 case SO_LINGER: 1007 m->m_len = sizeof (struct linger); 1008 mtod(m, struct linger *)->l_onoff = 1009 so->so_options & SO_LINGER; 1010 mtod(m, struct linger *)->l_linger = so->so_linger; 1011 break; 1012 1013 case SO_USELOOPBACK: 1014 case SO_DONTROUTE: 1015 case SO_DEBUG: 1016 case SO_KEEPALIVE: 1017 case SO_REUSEADDR: 1018 case SO_REUSEPORT: 1019 case SO_BROADCAST: 1020 case SO_OOBINLINE: 1021 case SO_TIMESTAMP: 1022 *mtod(m, int *) = so->so_options & optname; 1023 break; 1024 1025 case SO_PRIVSTATE: 1026 *mtod(m, int *) = so->so_state & SS_PRIV; 1027 break; 1028 1029 case SO_TYPE: 1030 *mtod(m, int *) = so->so_type; 1031 break; 1032 1033 case SO_ERROR: 1034 *mtod(m, int *) = so->so_error; 1035 so->so_error = 0; 1036 break; 1037 1038 case SO_SNDBUF: 1039 *mtod(m, int *) = so->so_snd.sb_hiwat; 1040 break; 1041 1042 case SO_RCVBUF: 1043 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1044 break; 1045 1046 case SO_SNDLOWAT: 1047 *mtod(m, int *) = so->so_snd.sb_lowat; 1048 break; 1049 1050 case SO_RCVLOWAT: 1051 *mtod(m, int *) = so->so_rcv.sb_lowat; 1052 break; 1053 1054 case SO_SNDTIMEO: 1055 case SO_RCVTIMEO: 1056 { 1057 int val = (optname == SO_SNDTIMEO ? 1058 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1059 1060 m->m_len = sizeof(struct timeval); 1061 mtod(m, struct timeval *)->tv_sec = val / hz; 1062 mtod(m, struct timeval *)->tv_usec = 1063 (val % hz) * tick; 1064 break; 1065 } 1066 1067 default: 1068 (void)m_free(m); 1069 return (ENOPROTOOPT); 1070 } 1071 *mp = m; 1072 return (0); 1073 } 1074} 1075 1076void 1077sohasoutofband(so) 1078 register struct socket *so; 1079{ 1080 struct proc *p; 1081 1082 if (so->so_pgid < 0) 1083 gsignal(-so->so_pgid, SIGURG); 1084 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1085 psignal(p, SIGURG); 1086 selwakeup(&so->so_rcv.sb_sel); 1087} 1088