uipc_socket.c revision 33955
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.37 1998/02/19 19:38:20 fenner Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52 53#include <machine/limits.h> 54 55MALLOC_DEFINE(M_SOCKET, "socket", "socket structure"); 56MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 57MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 58 59static int somaxconn = SOMAXCONN; 60SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 61 0, ""); 62 63/* 64 * Socket operation routines. 65 * These routines are called by the routines in 66 * sys_socket.c or from a system process, and 67 * implement the semantics of socket operations by 68 * switching out to the protocol specific routines. 69 */ 70/*ARGSUSED*/ 71int 72socreate(dom, aso, type, proto, p) 73 int dom; 74 struct socket **aso; 75 register int type; 76 int proto; 77 struct proc *p; 78{ 79 register struct protosw *prp; 80 register struct socket *so; 81 register int error; 82 83 if (proto) 84 prp = pffindproto(dom, proto, type); 85 else 86 prp = pffindtype(dom, type); 87 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 88 return (EPROTONOSUPPORT); 89 if (prp->pr_type != type) 90 return (EPROTOTYPE); 91 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 92 bzero((caddr_t)so, sizeof(*so)); 93 TAILQ_INIT(&so->so_incomp); 94 TAILQ_INIT(&so->so_comp); 95 so->so_type = type; 96 so->so_uid = p->p_ucred->cr_uid;; 97 so->so_proto = prp; 98 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 99 if (error) { 100 so->so_state |= SS_NOFDREF; 101 sofree(so); 102 return (error); 103 } 104 *aso = so; 105 return (0); 106} 107 108int 109sobind(so, nam, p) 110 struct socket *so; 111 struct sockaddr *nam; 112 struct proc *p; 113{ 114 int s = splnet(); 115 int error; 116 117 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 118 splx(s); 119 return (error); 120} 121 122int 123solisten(so, backlog, p) 124 register struct socket *so; 125 int backlog; 126 struct proc *p; 127{ 128 int s = splnet(), error; 129 130 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 131 if (error) { 132 splx(s); 133 return (error); 134 } 135 if (so->so_comp.tqh_first == NULL) 136 so->so_options |= SO_ACCEPTCONN; 137 if (backlog < 0 || backlog > somaxconn) 138 backlog = somaxconn; 139 so->so_qlimit = backlog; 140 splx(s); 141 return (0); 142} 143 144void 145sofree(so) 146 register struct socket *so; 147{ 148 struct socket *head = so->so_head; 149 150 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 151 return; 152 if (head != NULL) { 153 if (so->so_state & SS_INCOMP) { 154 TAILQ_REMOVE(&head->so_incomp, so, so_list); 155 head->so_incqlen--; 156 } else if (so->so_state & SS_COMP) { 157 TAILQ_REMOVE(&head->so_comp, so, so_list); 158 } else { 159 panic("sofree: not queued"); 160 } 161 head->so_qlen--; 162 so->so_state &= ~(SS_INCOMP|SS_COMP); 163 so->so_head = NULL; 164 } 165 sbrelease(&so->so_snd); 166 sorflush(so); 167 FREE(so, M_SOCKET); 168} 169 170/* 171 * Close a socket on last file table reference removal. 172 * Initiate disconnect if connected. 173 * Free socket when disconnect complete. 174 */ 175int 176soclose(so) 177 register struct socket *so; 178{ 179 int s = splnet(); /* conservative */ 180 int error = 0; 181 182 if (so->so_options & SO_ACCEPTCONN) { 183 struct socket *sp, *sonext; 184 185 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 186 sonext = sp->so_list.tqe_next; 187 (void) soabort(sp); 188 } 189 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 190 sonext = sp->so_list.tqe_next; 191 (void) soabort(sp); 192 } 193 } 194 if (so->so_pcb == 0) 195 goto discard; 196 if (so->so_state & SS_ISCONNECTED) { 197 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 198 error = sodisconnect(so); 199 if (error) 200 goto drop; 201 } 202 if (so->so_options & SO_LINGER) { 203 if ((so->so_state & SS_ISDISCONNECTING) && 204 (so->so_state & SS_NBIO)) 205 goto drop; 206 while (so->so_state & SS_ISCONNECTED) { 207 error = tsleep((caddr_t)&so->so_timeo, 208 PSOCK | PCATCH, "soclos", so->so_linger); 209 if (error) 210 break; 211 } 212 } 213 } 214drop: 215 if (so->so_pcb) { 216 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 217 if (error == 0) 218 error = error2; 219 } 220discard: 221 if (so->so_state & SS_NOFDREF) 222 panic("soclose: NOFDREF"); 223 so->so_state |= SS_NOFDREF; 224 sofree(so); 225 splx(s); 226 return (error); 227} 228 229/* 230 * Must be called at splnet... 231 */ 232int 233soabort(so) 234 struct socket *so; 235{ 236 237 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 238} 239 240int 241soaccept(so, nam) 242 register struct socket *so; 243 struct sockaddr **nam; 244{ 245 int s = splnet(); 246 int error; 247 248 if ((so->so_state & SS_NOFDREF) == 0) 249 panic("soaccept: !NOFDREF"); 250 so->so_state &= ~SS_NOFDREF; 251 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 252 splx(s); 253 return (error); 254} 255 256int 257soconnect(so, nam, p) 258 register struct socket *so; 259 struct sockaddr *nam; 260 struct proc *p; 261{ 262 int s; 263 int error; 264 265 if (so->so_options & SO_ACCEPTCONN) 266 return (EOPNOTSUPP); 267 s = splnet(); 268 /* 269 * If protocol is connection-based, can only connect once. 270 * Otherwise, if connected, try to disconnect first. 271 * This allows user to disconnect by connecting to, e.g., 272 * a null address. 273 */ 274 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 275 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 276 (error = sodisconnect(so)))) 277 error = EISCONN; 278 else 279 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 280 splx(s); 281 return (error); 282} 283 284int 285soconnect2(so1, so2) 286 register struct socket *so1; 287 struct socket *so2; 288{ 289 int s = splnet(); 290 int error; 291 292 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 293 splx(s); 294 return (error); 295} 296 297int 298sodisconnect(so) 299 register struct socket *so; 300{ 301 int s = splnet(); 302 int error; 303 304 if ((so->so_state & SS_ISCONNECTED) == 0) { 305 error = ENOTCONN; 306 goto bad; 307 } 308 if (so->so_state & SS_ISDISCONNECTING) { 309 error = EALREADY; 310 goto bad; 311 } 312 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 313bad: 314 splx(s); 315 return (error); 316} 317 318#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 319/* 320 * Send on a socket. 321 * If send must go all at once and message is larger than 322 * send buffering, then hard error. 323 * Lock against other senders. 324 * If must go all at once and not enough room now, then 325 * inform user that this would block and do nothing. 326 * Otherwise, if nonblocking, send as much as possible. 327 * The data to be sent is described by "uio" if nonzero, 328 * otherwise by the mbuf chain "top" (which must be null 329 * if uio is not). Data provided in mbuf chain must be small 330 * enough to send all at once. 331 * 332 * Returns nonzero on error, timeout or signal; callers 333 * must check for short counts if EINTR/ERESTART are returned. 334 * Data and control buffers are freed on return. 335 */ 336int 337sosend(so, addr, uio, top, control, flags, p) 338 register struct socket *so; 339 struct sockaddr *addr; 340 struct uio *uio; 341 struct mbuf *top; 342 struct mbuf *control; 343 int flags; 344 struct proc *p; 345{ 346 struct mbuf **mp; 347 register struct mbuf *m; 348 register long space, len, resid; 349 int clen = 0, error, s, dontroute, mlen; 350 int atomic = sosendallatonce(so) || top; 351 352 if (uio) 353 resid = uio->uio_resid; 354 else 355 resid = top->m_pkthdr.len; 356 /* 357 * In theory resid should be unsigned. 358 * However, space must be signed, as it might be less than 0 359 * if we over-committed, and we must use a signed comparison 360 * of space and resid. On the other hand, a negative resid 361 * causes us to loop sending 0-length segments to the protocol. 362 * 363 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 364 * type sockets since that's an error. 365 */ 366 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 367 error = EINVAL; 368 goto out; 369 } 370 371 dontroute = 372 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 373 (so->so_proto->pr_flags & PR_ATOMIC); 374 if (p) 375 p->p_stats->p_ru.ru_msgsnd++; 376 if (control) 377 clen = control->m_len; 378#define snderr(errno) { error = errno; splx(s); goto release; } 379 380restart: 381 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 382 if (error) 383 goto out; 384 do { 385 s = splnet(); 386 if (so->so_state & SS_CANTSENDMORE) 387 snderr(EPIPE); 388 if (so->so_error) { 389 error = so->so_error; 390 so->so_error = 0; 391 splx(s); 392 goto release; 393 } 394 if ((so->so_state & SS_ISCONNECTED) == 0) { 395 /* 396 * `sendto' and `sendmsg' is allowed on a connection- 397 * based socket if it supports implied connect. 398 * Return ENOTCONN if not connected and no address is 399 * supplied. 400 */ 401 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 402 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 403 if ((so->so_state & SS_ISCONFIRMING) == 0 && 404 !(resid == 0 && clen != 0)) 405 snderr(ENOTCONN); 406 } else if (addr == 0) 407 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 408 ENOTCONN : EDESTADDRREQ); 409 } 410 space = sbspace(&so->so_snd); 411 if (flags & MSG_OOB) 412 space += 1024; 413 if ((atomic && resid > so->so_snd.sb_hiwat) || 414 clen > so->so_snd.sb_hiwat) 415 snderr(EMSGSIZE); 416 if (space < resid + clen && uio && 417 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 418 if (so->so_state & SS_NBIO) 419 snderr(EWOULDBLOCK); 420 sbunlock(&so->so_snd); 421 error = sbwait(&so->so_snd); 422 splx(s); 423 if (error) 424 goto out; 425 goto restart; 426 } 427 splx(s); 428 mp = ⊤ 429 space -= clen; 430 do { 431 if (uio == NULL) { 432 /* 433 * Data is prepackaged in "top". 434 */ 435 resid = 0; 436 if (flags & MSG_EOR) 437 top->m_flags |= M_EOR; 438 } else do { 439 if (top == 0) { 440 MGETHDR(m, M_WAIT, MT_DATA); 441 mlen = MHLEN; 442 m->m_pkthdr.len = 0; 443 m->m_pkthdr.rcvif = (struct ifnet *)0; 444 } else { 445 MGET(m, M_WAIT, MT_DATA); 446 mlen = MLEN; 447 } 448 if (resid >= MINCLSIZE) { 449 MCLGET(m, M_WAIT); 450 if ((m->m_flags & M_EXT) == 0) 451 goto nopages; 452 mlen = MCLBYTES; 453 len = min(min(mlen, resid), space); 454 } else { 455nopages: 456 len = min(min(mlen, resid), space); 457 /* 458 * For datagram protocols, leave room 459 * for protocol headers in first mbuf. 460 */ 461 if (atomic && top == 0 && len < mlen) 462 MH_ALIGN(m, len); 463 } 464 space -= len; 465 error = uiomove(mtod(m, caddr_t), (int)len, uio); 466 resid = uio->uio_resid; 467 m->m_len = len; 468 *mp = m; 469 top->m_pkthdr.len += len; 470 if (error) 471 goto release; 472 mp = &m->m_next; 473 if (resid <= 0) { 474 if (flags & MSG_EOR) 475 top->m_flags |= M_EOR; 476 break; 477 } 478 } while (space > 0 && atomic); 479 if (dontroute) 480 so->so_options |= SO_DONTROUTE; 481 s = splnet(); /* XXX */ 482 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 483 (flags & MSG_OOB) ? PRUS_OOB : 484 /* 485 * If the user set MSG_EOF, the protocol 486 * understands this flag and nothing left to 487 * send then use PRU_SEND_EOF instead of PRU_SEND. 488 */ 489 ((flags & MSG_EOF) && 490 (so->so_proto->pr_flags & PR_IMPLOPCL) && 491 (resid <= 0)) ? 492 PRUS_EOF : 0, 493 top, addr, control, p); 494 splx(s); 495 if (dontroute) 496 so->so_options &= ~SO_DONTROUTE; 497 clen = 0; 498 control = 0; 499 top = 0; 500 mp = ⊤ 501 if (error) 502 goto release; 503 } while (resid && space > 0); 504 } while (resid); 505 506release: 507 sbunlock(&so->so_snd); 508out: 509 if (top) 510 m_freem(top); 511 if (control) 512 m_freem(control); 513 return (error); 514} 515 516/* 517 * Implement receive operations on a socket. 518 * We depend on the way that records are added to the sockbuf 519 * by sbappend*. In particular, each record (mbufs linked through m_next) 520 * must begin with an address if the protocol so specifies, 521 * followed by an optional mbuf or mbufs containing ancillary data, 522 * and then zero or more mbufs of data. 523 * In order to avoid blocking network interrupts for the entire time here, 524 * we splx() while doing the actual copy to user space. 525 * Although the sockbuf is locked, new data may still be appended, 526 * and thus we must maintain consistency of the sockbuf during that time. 527 * 528 * The caller may receive the data as a single mbuf chain by supplying 529 * an mbuf **mp0 for use in returning the chain. The uio is then used 530 * only for the count in uio_resid. 531 */ 532int 533soreceive(so, psa, uio, mp0, controlp, flagsp) 534 register struct socket *so; 535 struct sockaddr **psa; 536 struct uio *uio; 537 struct mbuf **mp0; 538 struct mbuf **controlp; 539 int *flagsp; 540{ 541 register struct mbuf *m, **mp; 542 register int flags, len, error, s, offset; 543 struct protosw *pr = so->so_proto; 544 struct mbuf *nextrecord; 545 int moff, type = 0; 546 int orig_resid = uio->uio_resid; 547 548 mp = mp0; 549 if (psa) 550 *psa = 0; 551 if (controlp) 552 *controlp = 0; 553 if (flagsp) 554 flags = *flagsp &~ MSG_EOR; 555 else 556 flags = 0; 557 if (flags & MSG_OOB) { 558 m = m_get(M_WAIT, MT_DATA); 559 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 560 if (error) 561 goto bad; 562 do { 563 error = uiomove(mtod(m, caddr_t), 564 (int) min(uio->uio_resid, m->m_len), uio); 565 m = m_free(m); 566 } while (uio->uio_resid && error == 0 && m); 567bad: 568 if (m) 569 m_freem(m); 570 return (error); 571 } 572 if (mp) 573 *mp = (struct mbuf *)0; 574 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 575 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 576 577restart: 578 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 579 if (error) 580 return (error); 581 s = splnet(); 582 583 m = so->so_rcv.sb_mb; 584 /* 585 * If we have less data than requested, block awaiting more 586 * (subject to any timeout) if: 587 * 1. the current count is less than the low water mark, or 588 * 2. MSG_WAITALL is set, and it is possible to do the entire 589 * receive operation at once if we block (resid <= hiwat). 590 * 3. MSG_DONTWAIT is not set 591 * If MSG_WAITALL is set but resid is larger than the receive buffer, 592 * we have to do the receive in sections, and thus risk returning 593 * a short count if a timeout or signal occurs after we start. 594 */ 595 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 596 so->so_rcv.sb_cc < uio->uio_resid) && 597 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 598 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 599 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 600#ifdef DIAGNOSTIC 601 if (m == 0 && so->so_rcv.sb_cc) 602 panic("receive 1"); 603#endif 604 if (so->so_error) { 605 if (m) 606 goto dontblock; 607 error = so->so_error; 608 if ((flags & MSG_PEEK) == 0) 609 so->so_error = 0; 610 goto release; 611 } 612 if (so->so_state & SS_CANTRCVMORE) { 613 if (m) 614 goto dontblock; 615 else 616 goto release; 617 } 618 for (; m; m = m->m_next) 619 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 620 m = so->so_rcv.sb_mb; 621 goto dontblock; 622 } 623 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 624 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 625 error = ENOTCONN; 626 goto release; 627 } 628 if (uio->uio_resid == 0) 629 goto release; 630 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 631 error = EWOULDBLOCK; 632 goto release; 633 } 634 sbunlock(&so->so_rcv); 635 error = sbwait(&so->so_rcv); 636 splx(s); 637 if (error) 638 return (error); 639 goto restart; 640 } 641dontblock: 642 if (uio->uio_procp) 643 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 644 nextrecord = m->m_nextpkt; 645 if (pr->pr_flags & PR_ADDR) { 646#ifdef DIAGNOSTIC 647 if (m->m_type != MT_SONAME) 648 panic("receive 1a"); 649#endif 650 orig_resid = 0; 651 if (psa) 652 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 653 mp0 == 0); 654 if (flags & MSG_PEEK) { 655 m = m->m_next; 656 } else { 657 sbfree(&so->so_rcv, m); 658 MFREE(m, so->so_rcv.sb_mb); 659 m = so->so_rcv.sb_mb; 660 } 661 } 662 while (m && m->m_type == MT_CONTROL && error == 0) { 663 if (flags & MSG_PEEK) { 664 if (controlp) 665 *controlp = m_copy(m, 0, m->m_len); 666 m = m->m_next; 667 } else { 668 sbfree(&so->so_rcv, m); 669 if (controlp) { 670 if (pr->pr_domain->dom_externalize && 671 mtod(m, struct cmsghdr *)->cmsg_type == 672 SCM_RIGHTS) 673 error = (*pr->pr_domain->dom_externalize)(m); 674 *controlp = m; 675 so->so_rcv.sb_mb = m->m_next; 676 m->m_next = 0; 677 m = so->so_rcv.sb_mb; 678 } else { 679 MFREE(m, so->so_rcv.sb_mb); 680 m = so->so_rcv.sb_mb; 681 } 682 } 683 if (controlp) { 684 orig_resid = 0; 685 controlp = &(*controlp)->m_next; 686 } 687 } 688 if (m) { 689 if ((flags & MSG_PEEK) == 0) 690 m->m_nextpkt = nextrecord; 691 type = m->m_type; 692 if (type == MT_OOBDATA) 693 flags |= MSG_OOB; 694 } 695 moff = 0; 696 offset = 0; 697 while (m && uio->uio_resid > 0 && error == 0) { 698 if (m->m_type == MT_OOBDATA) { 699 if (type != MT_OOBDATA) 700 break; 701 } else if (type == MT_OOBDATA) 702 break; 703#ifdef DIAGNOSTIC 704 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 705 panic("receive 3"); 706#endif 707 so->so_state &= ~SS_RCVATMARK; 708 len = uio->uio_resid; 709 if (so->so_oobmark && len > so->so_oobmark - offset) 710 len = so->so_oobmark - offset; 711 if (len > m->m_len - moff) 712 len = m->m_len - moff; 713 /* 714 * If mp is set, just pass back the mbufs. 715 * Otherwise copy them out via the uio, then free. 716 * Sockbuf must be consistent here (points to current mbuf, 717 * it points to next record) when we drop priority; 718 * we must note any additions to the sockbuf when we 719 * block interrupts again. 720 */ 721 if (mp == 0) { 722 splx(s); 723 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 724 s = splnet(); 725 if (error) 726 goto release; 727 } else 728 uio->uio_resid -= len; 729 if (len == m->m_len - moff) { 730 if (m->m_flags & M_EOR) 731 flags |= MSG_EOR; 732 if (flags & MSG_PEEK) { 733 m = m->m_next; 734 moff = 0; 735 } else { 736 nextrecord = m->m_nextpkt; 737 sbfree(&so->so_rcv, m); 738 if (mp) { 739 *mp = m; 740 mp = &m->m_next; 741 so->so_rcv.sb_mb = m = m->m_next; 742 *mp = (struct mbuf *)0; 743 } else { 744 MFREE(m, so->so_rcv.sb_mb); 745 m = so->so_rcv.sb_mb; 746 } 747 if (m) 748 m->m_nextpkt = nextrecord; 749 } 750 } else { 751 if (flags & MSG_PEEK) 752 moff += len; 753 else { 754 if (mp) 755 *mp = m_copym(m, 0, len, M_WAIT); 756 m->m_data += len; 757 m->m_len -= len; 758 so->so_rcv.sb_cc -= len; 759 } 760 } 761 if (so->so_oobmark) { 762 if ((flags & MSG_PEEK) == 0) { 763 so->so_oobmark -= len; 764 if (so->so_oobmark == 0) { 765 so->so_state |= SS_RCVATMARK; 766 break; 767 } 768 } else { 769 offset += len; 770 if (offset == so->so_oobmark) 771 break; 772 } 773 } 774 if (flags & MSG_EOR) 775 break; 776 /* 777 * If the MSG_WAITALL flag is set (for non-atomic socket), 778 * we must not quit until "uio->uio_resid == 0" or an error 779 * termination. If a signal/timeout occurs, return 780 * with a short count but without error. 781 * Keep sockbuf locked against other readers. 782 */ 783 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 784 !sosendallatonce(so) && !nextrecord) { 785 if (so->so_error || so->so_state & SS_CANTRCVMORE) 786 break; 787 error = sbwait(&so->so_rcv); 788 if (error) { 789 sbunlock(&so->so_rcv); 790 splx(s); 791 return (0); 792 } 793 m = so->so_rcv.sb_mb; 794 if (m) 795 nextrecord = m->m_nextpkt; 796 } 797 } 798 799 if (m && pr->pr_flags & PR_ATOMIC) { 800 flags |= MSG_TRUNC; 801 if ((flags & MSG_PEEK) == 0) 802 (void) sbdroprecord(&so->so_rcv); 803 } 804 if ((flags & MSG_PEEK) == 0) { 805 if (m == 0) 806 so->so_rcv.sb_mb = nextrecord; 807 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 808 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 809 } 810 if (orig_resid == uio->uio_resid && orig_resid && 811 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 812 sbunlock(&so->so_rcv); 813 splx(s); 814 goto restart; 815 } 816 817 if (flagsp) 818 *flagsp |= flags; 819release: 820 sbunlock(&so->so_rcv); 821 splx(s); 822 return (error); 823} 824 825int 826soshutdown(so, how) 827 register struct socket *so; 828 register int how; 829{ 830 register struct protosw *pr = so->so_proto; 831 832 how++; 833 if (how & FREAD) 834 sorflush(so); 835 if (how & FWRITE) 836 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 837 return (0); 838} 839 840void 841sorflush(so) 842 register struct socket *so; 843{ 844 register struct sockbuf *sb = &so->so_rcv; 845 register struct protosw *pr = so->so_proto; 846 register int s; 847 struct sockbuf asb; 848 849 sb->sb_flags |= SB_NOINTR; 850 (void) sblock(sb, M_WAITOK); 851 s = splimp(); 852 socantrcvmore(so); 853 sbunlock(sb); 854 asb = *sb; 855 bzero((caddr_t)sb, sizeof (*sb)); 856 splx(s); 857 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 858 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 859 sbrelease(&asb); 860} 861 862int 863sosetopt(so, level, optname, m0, p) 864 register struct socket *so; 865 int level, optname; 866 struct mbuf *m0; 867 struct proc *p; 868{ 869 int error = 0; 870 register struct mbuf *m = m0; 871 872 if (level != SOL_SOCKET) { 873 if (so->so_proto && so->so_proto->pr_ctloutput) 874 return ((*so->so_proto->pr_ctloutput) 875 (PRCO_SETOPT, so, level, optname, &m0, p)); 876 error = ENOPROTOOPT; 877 } else { 878 switch (optname) { 879 880 case SO_LINGER: 881 if (m == NULL || m->m_len != sizeof (struct linger)) { 882 error = EINVAL; 883 goto bad; 884 } 885 so->so_linger = mtod(m, struct linger *)->l_linger; 886 /* fall thru... */ 887 888 case SO_DEBUG: 889 case SO_KEEPALIVE: 890 case SO_DONTROUTE: 891 case SO_USELOOPBACK: 892 case SO_BROADCAST: 893 case SO_REUSEADDR: 894 case SO_REUSEPORT: 895 case SO_OOBINLINE: 896 case SO_TIMESTAMP: 897 if (m == NULL || m->m_len < sizeof (int)) { 898 error = EINVAL; 899 goto bad; 900 } 901 if (*mtod(m, int *)) 902 so->so_options |= optname; 903 else 904 so->so_options &= ~optname; 905 break; 906 907 case SO_SNDBUF: 908 case SO_RCVBUF: 909 case SO_SNDLOWAT: 910 case SO_RCVLOWAT: 911 { 912 int optval; 913 914 if (m == NULL || m->m_len < sizeof (int)) { 915 error = EINVAL; 916 goto bad; 917 } 918 919 /* 920 * Values < 1 make no sense for any of these 921 * options, so disallow them. 922 */ 923 optval = *mtod(m, int *); 924 if (optval < 1) { 925 error = EINVAL; 926 goto bad; 927 } 928 929 switch (optname) { 930 931 case SO_SNDBUF: 932 case SO_RCVBUF: 933 if (sbreserve(optname == SO_SNDBUF ? 934 &so->so_snd : &so->so_rcv, 935 (u_long) optval) == 0) { 936 error = ENOBUFS; 937 goto bad; 938 } 939 break; 940 941 /* 942 * Make sure the low-water is never greater than 943 * the high-water. 944 */ 945 case SO_SNDLOWAT: 946 so->so_snd.sb_lowat = 947 (optval > so->so_snd.sb_hiwat) ? 948 so->so_snd.sb_hiwat : optval; 949 break; 950 case SO_RCVLOWAT: 951 so->so_rcv.sb_lowat = 952 (optval > so->so_rcv.sb_hiwat) ? 953 so->so_rcv.sb_hiwat : optval; 954 break; 955 } 956 break; 957 } 958 959 case SO_SNDTIMEO: 960 case SO_RCVTIMEO: 961 { 962 struct timeval *tv; 963 short val; 964 965 if (m == NULL || m->m_len < sizeof (*tv)) { 966 error = EINVAL; 967 goto bad; 968 } 969 tv = mtod(m, struct timeval *); 970 if (tv->tv_sec > SHRT_MAX / hz - hz) { 971 error = EDOM; 972 goto bad; 973 } 974 val = tv->tv_sec * hz + tv->tv_usec / tick; 975 976 switch (optname) { 977 978 case SO_SNDTIMEO: 979 so->so_snd.sb_timeo = val; 980 break; 981 case SO_RCVTIMEO: 982 so->so_rcv.sb_timeo = val; 983 break; 984 } 985 break; 986 } 987 988 default: 989 error = ENOPROTOOPT; 990 break; 991 } 992 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 993 (void) ((*so->so_proto->pr_ctloutput) 994 (PRCO_SETOPT, so, level, optname, &m0, p)); 995 m = NULL; /* freed by protocol */ 996 } 997 } 998bad: 999 if (m) 1000 (void) m_free(m); 1001 return (error); 1002} 1003 1004int 1005sogetopt(so, level, optname, mp, p) 1006 register struct socket *so; 1007 int level, optname; 1008 struct mbuf **mp; 1009 struct proc *p; 1010{ 1011 register struct mbuf *m; 1012 1013 if (level != SOL_SOCKET) { 1014 if (so->so_proto && so->so_proto->pr_ctloutput) { 1015 return ((*so->so_proto->pr_ctloutput) 1016 (PRCO_GETOPT, so, level, optname, mp, p)); 1017 } else 1018 return (ENOPROTOOPT); 1019 } else { 1020 m = m_get(M_WAIT, MT_SOOPTS); 1021 m->m_len = sizeof (int); 1022 1023 switch (optname) { 1024 1025 case SO_LINGER: 1026 m->m_len = sizeof (struct linger); 1027 mtod(m, struct linger *)->l_onoff = 1028 so->so_options & SO_LINGER; 1029 mtod(m, struct linger *)->l_linger = so->so_linger; 1030 break; 1031 1032 case SO_USELOOPBACK: 1033 case SO_DONTROUTE: 1034 case SO_DEBUG: 1035 case SO_KEEPALIVE: 1036 case SO_REUSEADDR: 1037 case SO_REUSEPORT: 1038 case SO_BROADCAST: 1039 case SO_OOBINLINE: 1040 case SO_TIMESTAMP: 1041 *mtod(m, int *) = so->so_options & optname; 1042 break; 1043 1044 case SO_TYPE: 1045 *mtod(m, int *) = so->so_type; 1046 break; 1047 1048 case SO_ERROR: 1049 *mtod(m, int *) = so->so_error; 1050 so->so_error = 0; 1051 break; 1052 1053 case SO_SNDBUF: 1054 *mtod(m, int *) = so->so_snd.sb_hiwat; 1055 break; 1056 1057 case SO_RCVBUF: 1058 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1059 break; 1060 1061 case SO_SNDLOWAT: 1062 *mtod(m, int *) = so->so_snd.sb_lowat; 1063 break; 1064 1065 case SO_RCVLOWAT: 1066 *mtod(m, int *) = so->so_rcv.sb_lowat; 1067 break; 1068 1069 case SO_SNDTIMEO: 1070 case SO_RCVTIMEO: 1071 { 1072 int val = (optname == SO_SNDTIMEO ? 1073 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1074 1075 m->m_len = sizeof(struct timeval); 1076 mtod(m, struct timeval *)->tv_sec = val / hz; 1077 mtod(m, struct timeval *)->tv_usec = 1078 (val % hz) * tick; 1079 break; 1080 } 1081 1082 default: 1083 (void)m_free(m); 1084 return (ENOPROTOOPT); 1085 } 1086 *mp = m; 1087 return (0); 1088 } 1089} 1090 1091void 1092sohasoutofband(so) 1093 register struct socket *so; 1094{ 1095 struct proc *p; 1096 1097 if (so->so_pgid < 0) 1098 gsignal(-so->so_pgid, SIGURG); 1099 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1100 psignal(p, SIGURG); 1101 selwakeup(&so->so_rcv.sb_sel); 1102} 1103 1104int 1105sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1106{ 1107 int revents = 0; 1108 int s = splnet(); 1109 1110 if (events & (POLLIN | POLLRDNORM)) 1111 if (soreadable(so)) 1112 revents |= events & (POLLIN | POLLRDNORM); 1113 1114 if (events & (POLLOUT | POLLWRNORM)) 1115 if (sowriteable(so)) 1116 revents |= events & (POLLOUT | POLLWRNORM); 1117 1118 if (events & (POLLPRI | POLLRDBAND)) 1119 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1120 revents |= events & (POLLPRI | POLLRDBAND); 1121 1122 if (revents == 0) { 1123 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1124 selrecord(p, &so->so_rcv.sb_sel); 1125 so->so_rcv.sb_flags |= SB_SEL; 1126 } 1127 1128 if (events & (POLLOUT | POLLWRNORM)) { 1129 selrecord(p, &so->so_snd.sb_sel); 1130 so->so_snd.sb_flags |= SB_SEL; 1131 } 1132 } 1133 1134 splx(s); 1135 return (revents); 1136} 1137