uipc_socket.c revision 33628
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.36 1998/02/06 12:13:28 eivind Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52 53#include <machine/limits.h> 54 55MALLOC_DEFINE(M_SOCKET, "socket", "socket structure"); 56MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 57MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 58 59static int somaxconn = SOMAXCONN; 60SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 61 0, ""); 62 63/* 64 * Socket operation routines. 65 * These routines are called by the routines in 66 * sys_socket.c or from a system process, and 67 * implement the semantics of socket operations by 68 * switching out to the protocol specific routines. 69 */ 70/*ARGSUSED*/ 71int 72socreate(dom, aso, type, proto, p) 73 int dom; 74 struct socket **aso; 75 register int type; 76 int proto; 77 struct proc *p; 78{ 79 register struct protosw *prp; 80 register struct socket *so; 81 register int error; 82 83 if (proto) 84 prp = pffindproto(dom, proto, type); 85 else 86 prp = pffindtype(dom, type); 87 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 88 return (EPROTONOSUPPORT); 89 if (prp->pr_type != type) 90 return (EPROTOTYPE); 91 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 92 bzero((caddr_t)so, sizeof(*so)); 93 TAILQ_INIT(&so->so_incomp); 94 TAILQ_INIT(&so->so_comp); 95 so->so_type = type; 96 so->so_proto = prp; 97 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 98 if (error) { 99 so->so_state |= SS_NOFDREF; 100 sofree(so); 101 return (error); 102 } 103 *aso = so; 104 return (0); 105} 106 107int 108sobind(so, nam, p) 109 struct socket *so; 110 struct sockaddr *nam; 111 struct proc *p; 112{ 113 int s = splnet(); 114 int error; 115 116 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 117 splx(s); 118 return (error); 119} 120 121int 122solisten(so, backlog, p) 123 register struct socket *so; 124 int backlog; 125 struct proc *p; 126{ 127 int s = splnet(), error; 128 129 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 130 if (error) { 131 splx(s); 132 return (error); 133 } 134 if (so->so_comp.tqh_first == NULL) 135 so->so_options |= SO_ACCEPTCONN; 136 if (backlog < 0 || backlog > somaxconn) 137 backlog = somaxconn; 138 so->so_qlimit = backlog; 139 splx(s); 140 return (0); 141} 142 143void 144sofree(so) 145 register struct socket *so; 146{ 147 struct socket *head = so->so_head; 148 149 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 150 return; 151 if (head != NULL) { 152 if (so->so_state & SS_INCOMP) { 153 TAILQ_REMOVE(&head->so_incomp, so, so_list); 154 head->so_incqlen--; 155 } else if (so->so_state & SS_COMP) { 156 TAILQ_REMOVE(&head->so_comp, so, so_list); 157 } else { 158 panic("sofree: not queued"); 159 } 160 head->so_qlen--; 161 so->so_state &= ~(SS_INCOMP|SS_COMP); 162 so->so_head = NULL; 163 } 164 sbrelease(&so->so_snd); 165 sorflush(so); 166 FREE(so, M_SOCKET); 167} 168 169/* 170 * Close a socket on last file table reference removal. 171 * Initiate disconnect if connected. 172 * Free socket when disconnect complete. 173 */ 174int 175soclose(so) 176 register struct socket *so; 177{ 178 int s = splnet(); /* conservative */ 179 int error = 0; 180 181 if (so->so_options & SO_ACCEPTCONN) { 182 struct socket *sp, *sonext; 183 184 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 185 sonext = sp->so_list.tqe_next; 186 (void) soabort(sp); 187 } 188 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 189 sonext = sp->so_list.tqe_next; 190 (void) soabort(sp); 191 } 192 } 193 if (so->so_pcb == 0) 194 goto discard; 195 if (so->so_state & SS_ISCONNECTED) { 196 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 197 error = sodisconnect(so); 198 if (error) 199 goto drop; 200 } 201 if (so->so_options & SO_LINGER) { 202 if ((so->so_state & SS_ISDISCONNECTING) && 203 (so->so_state & SS_NBIO)) 204 goto drop; 205 while (so->so_state & SS_ISCONNECTED) { 206 error = tsleep((caddr_t)&so->so_timeo, 207 PSOCK | PCATCH, "soclos", so->so_linger); 208 if (error) 209 break; 210 } 211 } 212 } 213drop: 214 if (so->so_pcb) { 215 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 216 if (error == 0) 217 error = error2; 218 } 219discard: 220 if (so->so_state & SS_NOFDREF) 221 panic("soclose: NOFDREF"); 222 so->so_state |= SS_NOFDREF; 223 sofree(so); 224 splx(s); 225 return (error); 226} 227 228/* 229 * Must be called at splnet... 230 */ 231int 232soabort(so) 233 struct socket *so; 234{ 235 236 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 237} 238 239int 240soaccept(so, nam) 241 register struct socket *so; 242 struct sockaddr **nam; 243{ 244 int s = splnet(); 245 int error; 246 247 if ((so->so_state & SS_NOFDREF) == 0) 248 panic("soaccept: !NOFDREF"); 249 so->so_state &= ~SS_NOFDREF; 250 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 251 splx(s); 252 return (error); 253} 254 255int 256soconnect(so, nam, p) 257 register struct socket *so; 258 struct sockaddr *nam; 259 struct proc *p; 260{ 261 int s; 262 int error; 263 264 if (so->so_options & SO_ACCEPTCONN) 265 return (EOPNOTSUPP); 266 s = splnet(); 267 /* 268 * If protocol is connection-based, can only connect once. 269 * Otherwise, if connected, try to disconnect first. 270 * This allows user to disconnect by connecting to, e.g., 271 * a null address. 272 */ 273 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 274 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 275 (error = sodisconnect(so)))) 276 error = EISCONN; 277 else 278 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 279 splx(s); 280 return (error); 281} 282 283int 284soconnect2(so1, so2) 285 register struct socket *so1; 286 struct socket *so2; 287{ 288 int s = splnet(); 289 int error; 290 291 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 292 splx(s); 293 return (error); 294} 295 296int 297sodisconnect(so) 298 register struct socket *so; 299{ 300 int s = splnet(); 301 int error; 302 303 if ((so->so_state & SS_ISCONNECTED) == 0) { 304 error = ENOTCONN; 305 goto bad; 306 } 307 if (so->so_state & SS_ISDISCONNECTING) { 308 error = EALREADY; 309 goto bad; 310 } 311 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 312bad: 313 splx(s); 314 return (error); 315} 316 317#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 318/* 319 * Send on a socket. 320 * If send must go all at once and message is larger than 321 * send buffering, then hard error. 322 * Lock against other senders. 323 * If must go all at once and not enough room now, then 324 * inform user that this would block and do nothing. 325 * Otherwise, if nonblocking, send as much as possible. 326 * The data to be sent is described by "uio" if nonzero, 327 * otherwise by the mbuf chain "top" (which must be null 328 * if uio is not). Data provided in mbuf chain must be small 329 * enough to send all at once. 330 * 331 * Returns nonzero on error, timeout or signal; callers 332 * must check for short counts if EINTR/ERESTART are returned. 333 * Data and control buffers are freed on return. 334 */ 335int 336sosend(so, addr, uio, top, control, flags, p) 337 register struct socket *so; 338 struct sockaddr *addr; 339 struct uio *uio; 340 struct mbuf *top; 341 struct mbuf *control; 342 int flags; 343 struct proc *p; 344{ 345 struct mbuf **mp; 346 register struct mbuf *m; 347 register long space, len, resid; 348 int clen = 0, error, s, dontroute, mlen; 349 int atomic = sosendallatonce(so) || top; 350 351 if (uio) 352 resid = uio->uio_resid; 353 else 354 resid = top->m_pkthdr.len; 355 /* 356 * In theory resid should be unsigned. 357 * However, space must be signed, as it might be less than 0 358 * if we over-committed, and we must use a signed comparison 359 * of space and resid. On the other hand, a negative resid 360 * causes us to loop sending 0-length segments to the protocol. 361 * 362 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 363 * type sockets since that's an error. 364 */ 365 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 366 error = EINVAL; 367 goto out; 368 } 369 370 dontroute = 371 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 372 (so->so_proto->pr_flags & PR_ATOMIC); 373 if (p) 374 p->p_stats->p_ru.ru_msgsnd++; 375 if (control) 376 clen = control->m_len; 377#define snderr(errno) { error = errno; splx(s); goto release; } 378 379restart: 380 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 381 if (error) 382 goto out; 383 do { 384 s = splnet(); 385 if (so->so_state & SS_CANTSENDMORE) 386 snderr(EPIPE); 387 if (so->so_error) { 388 error = so->so_error; 389 so->so_error = 0; 390 splx(s); 391 goto release; 392 } 393 if ((so->so_state & SS_ISCONNECTED) == 0) { 394 /* 395 * `sendto' and `sendmsg' is allowed on a connection- 396 * based socket if it supports implied connect. 397 * Return ENOTCONN if not connected and no address is 398 * supplied. 399 */ 400 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 401 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 402 if ((so->so_state & SS_ISCONFIRMING) == 0 && 403 !(resid == 0 && clen != 0)) 404 snderr(ENOTCONN); 405 } else if (addr == 0) 406 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 407 ENOTCONN : EDESTADDRREQ); 408 } 409 space = sbspace(&so->so_snd); 410 if (flags & MSG_OOB) 411 space += 1024; 412 if ((atomic && resid > so->so_snd.sb_hiwat) || 413 clen > so->so_snd.sb_hiwat) 414 snderr(EMSGSIZE); 415 if (space < resid + clen && uio && 416 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 417 if (so->so_state & SS_NBIO) 418 snderr(EWOULDBLOCK); 419 sbunlock(&so->so_snd); 420 error = sbwait(&so->so_snd); 421 splx(s); 422 if (error) 423 goto out; 424 goto restart; 425 } 426 splx(s); 427 mp = ⊤ 428 space -= clen; 429 do { 430 if (uio == NULL) { 431 /* 432 * Data is prepackaged in "top". 433 */ 434 resid = 0; 435 if (flags & MSG_EOR) 436 top->m_flags |= M_EOR; 437 } else do { 438 if (top == 0) { 439 MGETHDR(m, M_WAIT, MT_DATA); 440 mlen = MHLEN; 441 m->m_pkthdr.len = 0; 442 m->m_pkthdr.rcvif = (struct ifnet *)0; 443 } else { 444 MGET(m, M_WAIT, MT_DATA); 445 mlen = MLEN; 446 } 447 if (resid >= MINCLSIZE) { 448 MCLGET(m, M_WAIT); 449 if ((m->m_flags & M_EXT) == 0) 450 goto nopages; 451 mlen = MCLBYTES; 452 len = min(min(mlen, resid), space); 453 } else { 454nopages: 455 len = min(min(mlen, resid), space); 456 /* 457 * For datagram protocols, leave room 458 * for protocol headers in first mbuf. 459 */ 460 if (atomic && top == 0 && len < mlen) 461 MH_ALIGN(m, len); 462 } 463 space -= len; 464 error = uiomove(mtod(m, caddr_t), (int)len, uio); 465 resid = uio->uio_resid; 466 m->m_len = len; 467 *mp = m; 468 top->m_pkthdr.len += len; 469 if (error) 470 goto release; 471 mp = &m->m_next; 472 if (resid <= 0) { 473 if (flags & MSG_EOR) 474 top->m_flags |= M_EOR; 475 break; 476 } 477 } while (space > 0 && atomic); 478 if (dontroute) 479 so->so_options |= SO_DONTROUTE; 480 s = splnet(); /* XXX */ 481 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 482 (flags & MSG_OOB) ? PRUS_OOB : 483 /* 484 * If the user set MSG_EOF, the protocol 485 * understands this flag and nothing left to 486 * send then use PRU_SEND_EOF instead of PRU_SEND. 487 */ 488 ((flags & MSG_EOF) && 489 (so->so_proto->pr_flags & PR_IMPLOPCL) && 490 (resid <= 0)) ? 491 PRUS_EOF : 0, 492 top, addr, control, p); 493 splx(s); 494 if (dontroute) 495 so->so_options &= ~SO_DONTROUTE; 496 clen = 0; 497 control = 0; 498 top = 0; 499 mp = ⊤ 500 if (error) 501 goto release; 502 } while (resid && space > 0); 503 } while (resid); 504 505release: 506 sbunlock(&so->so_snd); 507out: 508 if (top) 509 m_freem(top); 510 if (control) 511 m_freem(control); 512 return (error); 513} 514 515/* 516 * Implement receive operations on a socket. 517 * We depend on the way that records are added to the sockbuf 518 * by sbappend*. In particular, each record (mbufs linked through m_next) 519 * must begin with an address if the protocol so specifies, 520 * followed by an optional mbuf or mbufs containing ancillary data, 521 * and then zero or more mbufs of data. 522 * In order to avoid blocking network interrupts for the entire time here, 523 * we splx() while doing the actual copy to user space. 524 * Although the sockbuf is locked, new data may still be appended, 525 * and thus we must maintain consistency of the sockbuf during that time. 526 * 527 * The caller may receive the data as a single mbuf chain by supplying 528 * an mbuf **mp0 for use in returning the chain. The uio is then used 529 * only for the count in uio_resid. 530 */ 531int 532soreceive(so, psa, uio, mp0, controlp, flagsp) 533 register struct socket *so; 534 struct sockaddr **psa; 535 struct uio *uio; 536 struct mbuf **mp0; 537 struct mbuf **controlp; 538 int *flagsp; 539{ 540 register struct mbuf *m, **mp; 541 register int flags, len, error, s, offset; 542 struct protosw *pr = so->so_proto; 543 struct mbuf *nextrecord; 544 int moff, type = 0; 545 int orig_resid = uio->uio_resid; 546 547 mp = mp0; 548 if (psa) 549 *psa = 0; 550 if (controlp) 551 *controlp = 0; 552 if (flagsp) 553 flags = *flagsp &~ MSG_EOR; 554 else 555 flags = 0; 556 if (flags & MSG_OOB) { 557 m = m_get(M_WAIT, MT_DATA); 558 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 559 if (error) 560 goto bad; 561 do { 562 error = uiomove(mtod(m, caddr_t), 563 (int) min(uio->uio_resid, m->m_len), uio); 564 m = m_free(m); 565 } while (uio->uio_resid && error == 0 && m); 566bad: 567 if (m) 568 m_freem(m); 569 return (error); 570 } 571 if (mp) 572 *mp = (struct mbuf *)0; 573 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 574 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 575 576restart: 577 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 578 if (error) 579 return (error); 580 s = splnet(); 581 582 m = so->so_rcv.sb_mb; 583 /* 584 * If we have less data than requested, block awaiting more 585 * (subject to any timeout) if: 586 * 1. the current count is less than the low water mark, or 587 * 2. MSG_WAITALL is set, and it is possible to do the entire 588 * receive operation at once if we block (resid <= hiwat). 589 * 3. MSG_DONTWAIT is not set 590 * If MSG_WAITALL is set but resid is larger than the receive buffer, 591 * we have to do the receive in sections, and thus risk returning 592 * a short count if a timeout or signal occurs after we start. 593 */ 594 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 595 so->so_rcv.sb_cc < uio->uio_resid) && 596 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 597 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 598 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 599#ifdef DIAGNOSTIC 600 if (m == 0 && so->so_rcv.sb_cc) 601 panic("receive 1"); 602#endif 603 if (so->so_error) { 604 if (m) 605 goto dontblock; 606 error = so->so_error; 607 if ((flags & MSG_PEEK) == 0) 608 so->so_error = 0; 609 goto release; 610 } 611 if (so->so_state & SS_CANTRCVMORE) { 612 if (m) 613 goto dontblock; 614 else 615 goto release; 616 } 617 for (; m; m = m->m_next) 618 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 619 m = so->so_rcv.sb_mb; 620 goto dontblock; 621 } 622 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 623 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 624 error = ENOTCONN; 625 goto release; 626 } 627 if (uio->uio_resid == 0) 628 goto release; 629 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 630 error = EWOULDBLOCK; 631 goto release; 632 } 633 sbunlock(&so->so_rcv); 634 error = sbwait(&so->so_rcv); 635 splx(s); 636 if (error) 637 return (error); 638 goto restart; 639 } 640dontblock: 641 if (uio->uio_procp) 642 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 643 nextrecord = m->m_nextpkt; 644 if (pr->pr_flags & PR_ADDR) { 645#ifdef DIAGNOSTIC 646 if (m->m_type != MT_SONAME) 647 panic("receive 1a"); 648#endif 649 orig_resid = 0; 650 if (psa) 651 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 652 mp0 == 0); 653 if (flags & MSG_PEEK) { 654 m = m->m_next; 655 } else { 656 sbfree(&so->so_rcv, m); 657 MFREE(m, so->so_rcv.sb_mb); 658 m = so->so_rcv.sb_mb; 659 } 660 } 661 while (m && m->m_type == MT_CONTROL && error == 0) { 662 if (flags & MSG_PEEK) { 663 if (controlp) 664 *controlp = m_copy(m, 0, m->m_len); 665 m = m->m_next; 666 } else { 667 sbfree(&so->so_rcv, m); 668 if (controlp) { 669 if (pr->pr_domain->dom_externalize && 670 mtod(m, struct cmsghdr *)->cmsg_type == 671 SCM_RIGHTS) 672 error = (*pr->pr_domain->dom_externalize)(m); 673 *controlp = m; 674 so->so_rcv.sb_mb = m->m_next; 675 m->m_next = 0; 676 m = so->so_rcv.sb_mb; 677 } else { 678 MFREE(m, so->so_rcv.sb_mb); 679 m = so->so_rcv.sb_mb; 680 } 681 } 682 if (controlp) { 683 orig_resid = 0; 684 controlp = &(*controlp)->m_next; 685 } 686 } 687 if (m) { 688 if ((flags & MSG_PEEK) == 0) 689 m->m_nextpkt = nextrecord; 690 type = m->m_type; 691 if (type == MT_OOBDATA) 692 flags |= MSG_OOB; 693 } 694 moff = 0; 695 offset = 0; 696 while (m && uio->uio_resid > 0 && error == 0) { 697 if (m->m_type == MT_OOBDATA) { 698 if (type != MT_OOBDATA) 699 break; 700 } else if (type == MT_OOBDATA) 701 break; 702#ifdef DIAGNOSTIC 703 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 704 panic("receive 3"); 705#endif 706 so->so_state &= ~SS_RCVATMARK; 707 len = uio->uio_resid; 708 if (so->so_oobmark && len > so->so_oobmark - offset) 709 len = so->so_oobmark - offset; 710 if (len > m->m_len - moff) 711 len = m->m_len - moff; 712 /* 713 * If mp is set, just pass back the mbufs. 714 * Otherwise copy them out via the uio, then free. 715 * Sockbuf must be consistent here (points to current mbuf, 716 * it points to next record) when we drop priority; 717 * we must note any additions to the sockbuf when we 718 * block interrupts again. 719 */ 720 if (mp == 0) { 721 splx(s); 722 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 723 s = splnet(); 724 if (error) 725 goto release; 726 } else 727 uio->uio_resid -= len; 728 if (len == m->m_len - moff) { 729 if (m->m_flags & M_EOR) 730 flags |= MSG_EOR; 731 if (flags & MSG_PEEK) { 732 m = m->m_next; 733 moff = 0; 734 } else { 735 nextrecord = m->m_nextpkt; 736 sbfree(&so->so_rcv, m); 737 if (mp) { 738 *mp = m; 739 mp = &m->m_next; 740 so->so_rcv.sb_mb = m = m->m_next; 741 *mp = (struct mbuf *)0; 742 } else { 743 MFREE(m, so->so_rcv.sb_mb); 744 m = so->so_rcv.sb_mb; 745 } 746 if (m) 747 m->m_nextpkt = nextrecord; 748 } 749 } else { 750 if (flags & MSG_PEEK) 751 moff += len; 752 else { 753 if (mp) 754 *mp = m_copym(m, 0, len, M_WAIT); 755 m->m_data += len; 756 m->m_len -= len; 757 so->so_rcv.sb_cc -= len; 758 } 759 } 760 if (so->so_oobmark) { 761 if ((flags & MSG_PEEK) == 0) { 762 so->so_oobmark -= len; 763 if (so->so_oobmark == 0) { 764 so->so_state |= SS_RCVATMARK; 765 break; 766 } 767 } else { 768 offset += len; 769 if (offset == so->so_oobmark) 770 break; 771 } 772 } 773 if (flags & MSG_EOR) 774 break; 775 /* 776 * If the MSG_WAITALL flag is set (for non-atomic socket), 777 * we must not quit until "uio->uio_resid == 0" or an error 778 * termination. If a signal/timeout occurs, return 779 * with a short count but without error. 780 * Keep sockbuf locked against other readers. 781 */ 782 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 783 !sosendallatonce(so) && !nextrecord) { 784 if (so->so_error || so->so_state & SS_CANTRCVMORE) 785 break; 786 error = sbwait(&so->so_rcv); 787 if (error) { 788 sbunlock(&so->so_rcv); 789 splx(s); 790 return (0); 791 } 792 m = so->so_rcv.sb_mb; 793 if (m) 794 nextrecord = m->m_nextpkt; 795 } 796 } 797 798 if (m && pr->pr_flags & PR_ATOMIC) { 799 flags |= MSG_TRUNC; 800 if ((flags & MSG_PEEK) == 0) 801 (void) sbdroprecord(&so->so_rcv); 802 } 803 if ((flags & MSG_PEEK) == 0) { 804 if (m == 0) 805 so->so_rcv.sb_mb = nextrecord; 806 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 807 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 808 } 809 if (orig_resid == uio->uio_resid && orig_resid && 810 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 811 sbunlock(&so->so_rcv); 812 splx(s); 813 goto restart; 814 } 815 816 if (flagsp) 817 *flagsp |= flags; 818release: 819 sbunlock(&so->so_rcv); 820 splx(s); 821 return (error); 822} 823 824int 825soshutdown(so, how) 826 register struct socket *so; 827 register int how; 828{ 829 register struct protosw *pr = so->so_proto; 830 831 how++; 832 if (how & FREAD) 833 sorflush(so); 834 if (how & FWRITE) 835 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 836 return (0); 837} 838 839void 840sorflush(so) 841 register struct socket *so; 842{ 843 register struct sockbuf *sb = &so->so_rcv; 844 register struct protosw *pr = so->so_proto; 845 register int s; 846 struct sockbuf asb; 847 848 sb->sb_flags |= SB_NOINTR; 849 (void) sblock(sb, M_WAITOK); 850 s = splimp(); 851 socantrcvmore(so); 852 sbunlock(sb); 853 asb = *sb; 854 bzero((caddr_t)sb, sizeof (*sb)); 855 splx(s); 856 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 857 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 858 sbrelease(&asb); 859} 860 861int 862sosetopt(so, level, optname, m0, p) 863 register struct socket *so; 864 int level, optname; 865 struct mbuf *m0; 866 struct proc *p; 867{ 868 int error = 0; 869 register struct mbuf *m = m0; 870 871 if (level != SOL_SOCKET) { 872 if (so->so_proto && so->so_proto->pr_ctloutput) 873 return ((*so->so_proto->pr_ctloutput) 874 (PRCO_SETOPT, so, level, optname, &m0, p)); 875 error = ENOPROTOOPT; 876 } else { 877 switch (optname) { 878 879 case SO_LINGER: 880 if (m == NULL || m->m_len != sizeof (struct linger)) { 881 error = EINVAL; 882 goto bad; 883 } 884 so->so_linger = mtod(m, struct linger *)->l_linger; 885 /* fall thru... */ 886 887 case SO_DEBUG: 888 case SO_KEEPALIVE: 889 case SO_DONTROUTE: 890 case SO_USELOOPBACK: 891 case SO_BROADCAST: 892 case SO_REUSEADDR: 893 case SO_REUSEPORT: 894 case SO_OOBINLINE: 895 case SO_TIMESTAMP: 896 if (m == NULL || m->m_len < sizeof (int)) { 897 error = EINVAL; 898 goto bad; 899 } 900 if (*mtod(m, int *)) 901 so->so_options |= optname; 902 else 903 so->so_options &= ~optname; 904 break; 905 906 case SO_SNDBUF: 907 case SO_RCVBUF: 908 case SO_SNDLOWAT: 909 case SO_RCVLOWAT: 910 { 911 int optval; 912 913 if (m == NULL || m->m_len < sizeof (int)) { 914 error = EINVAL; 915 goto bad; 916 } 917 918 /* 919 * Values < 1 make no sense for any of these 920 * options, so disallow them. 921 */ 922 optval = *mtod(m, int *); 923 if (optval < 1) { 924 error = EINVAL; 925 goto bad; 926 } 927 928 switch (optname) { 929 930 case SO_SNDBUF: 931 case SO_RCVBUF: 932 if (sbreserve(optname == SO_SNDBUF ? 933 &so->so_snd : &so->so_rcv, 934 (u_long) optval) == 0) { 935 error = ENOBUFS; 936 goto bad; 937 } 938 break; 939 940 /* 941 * Make sure the low-water is never greater than 942 * the high-water. 943 */ 944 case SO_SNDLOWAT: 945 so->so_snd.sb_lowat = 946 (optval > so->so_snd.sb_hiwat) ? 947 so->so_snd.sb_hiwat : optval; 948 break; 949 case SO_RCVLOWAT: 950 so->so_rcv.sb_lowat = 951 (optval > so->so_rcv.sb_hiwat) ? 952 so->so_rcv.sb_hiwat : optval; 953 break; 954 } 955 break; 956 } 957 958 case SO_SNDTIMEO: 959 case SO_RCVTIMEO: 960 { 961 struct timeval *tv; 962 short val; 963 964 if (m == NULL || m->m_len < sizeof (*tv)) { 965 error = EINVAL; 966 goto bad; 967 } 968 tv = mtod(m, struct timeval *); 969 if (tv->tv_sec > SHRT_MAX / hz - hz) { 970 error = EDOM; 971 goto bad; 972 } 973 val = tv->tv_sec * hz + tv->tv_usec / tick; 974 975 switch (optname) { 976 977 case SO_SNDTIMEO: 978 so->so_snd.sb_timeo = val; 979 break; 980 case SO_RCVTIMEO: 981 so->so_rcv.sb_timeo = val; 982 break; 983 } 984 break; 985 } 986 987 default: 988 error = ENOPROTOOPT; 989 break; 990 } 991 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 992 (void) ((*so->so_proto->pr_ctloutput) 993 (PRCO_SETOPT, so, level, optname, &m0, p)); 994 m = NULL; /* freed by protocol */ 995 } 996 } 997bad: 998 if (m) 999 (void) m_free(m); 1000 return (error); 1001} 1002 1003int 1004sogetopt(so, level, optname, mp, p) 1005 register struct socket *so; 1006 int level, optname; 1007 struct mbuf **mp; 1008 struct proc *p; 1009{ 1010 register struct mbuf *m; 1011 1012 if (level != SOL_SOCKET) { 1013 if (so->so_proto && so->so_proto->pr_ctloutput) { 1014 return ((*so->so_proto->pr_ctloutput) 1015 (PRCO_GETOPT, so, level, optname, mp, p)); 1016 } else 1017 return (ENOPROTOOPT); 1018 } else { 1019 m = m_get(M_WAIT, MT_SOOPTS); 1020 m->m_len = sizeof (int); 1021 1022 switch (optname) { 1023 1024 case SO_LINGER: 1025 m->m_len = sizeof (struct linger); 1026 mtod(m, struct linger *)->l_onoff = 1027 so->so_options & SO_LINGER; 1028 mtod(m, struct linger *)->l_linger = so->so_linger; 1029 break; 1030 1031 case SO_USELOOPBACK: 1032 case SO_DONTROUTE: 1033 case SO_DEBUG: 1034 case SO_KEEPALIVE: 1035 case SO_REUSEADDR: 1036 case SO_REUSEPORT: 1037 case SO_BROADCAST: 1038 case SO_OOBINLINE: 1039 case SO_TIMESTAMP: 1040 *mtod(m, int *) = so->so_options & optname; 1041 break; 1042 1043 case SO_TYPE: 1044 *mtod(m, int *) = so->so_type; 1045 break; 1046 1047 case SO_ERROR: 1048 *mtod(m, int *) = so->so_error; 1049 so->so_error = 0; 1050 break; 1051 1052 case SO_SNDBUF: 1053 *mtod(m, int *) = so->so_snd.sb_hiwat; 1054 break; 1055 1056 case SO_RCVBUF: 1057 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1058 break; 1059 1060 case SO_SNDLOWAT: 1061 *mtod(m, int *) = so->so_snd.sb_lowat; 1062 break; 1063 1064 case SO_RCVLOWAT: 1065 *mtod(m, int *) = so->so_rcv.sb_lowat; 1066 break; 1067 1068 case SO_SNDTIMEO: 1069 case SO_RCVTIMEO: 1070 { 1071 int val = (optname == SO_SNDTIMEO ? 1072 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1073 1074 m->m_len = sizeof(struct timeval); 1075 mtod(m, struct timeval *)->tv_sec = val / hz; 1076 mtod(m, struct timeval *)->tv_usec = 1077 (val % hz) * tick; 1078 break; 1079 } 1080 1081 default: 1082 (void)m_free(m); 1083 return (ENOPROTOOPT); 1084 } 1085 *mp = m; 1086 return (0); 1087 } 1088} 1089 1090void 1091sohasoutofband(so) 1092 register struct socket *so; 1093{ 1094 struct proc *p; 1095 1096 if (so->so_pgid < 0) 1097 gsignal(-so->so_pgid, SIGURG); 1098 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1099 psignal(p, SIGURG); 1100 selwakeup(&so->so_rcv.sb_sel); 1101} 1102 1103int 1104sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1105{ 1106 int revents = 0; 1107 int s = splnet(); 1108 1109 if (events & (POLLIN | POLLRDNORM)) 1110 if (soreadable(so)) 1111 revents |= events & (POLLIN | POLLRDNORM); 1112 1113 if (events & (POLLOUT | POLLWRNORM)) 1114 if (sowriteable(so)) 1115 revents |= events & (POLLOUT | POLLWRNORM); 1116 1117 if (events & (POLLPRI | POLLRDBAND)) 1118 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1119 revents |= events & (POLLPRI | POLLRDBAND); 1120 1121 if (revents == 0) { 1122 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1123 selrecord(p, &so->so_rcv.sb_sel); 1124 so->so_rcv.sb_flags |= SB_SEL; 1125 } 1126 1127 if (events & (POLLOUT | POLLWRNORM)) { 1128 selrecord(p, &so->so_snd.sb_sel); 1129 so->so_snd.sb_flags |= SB_SEL; 1130 } 1131 } 1132 1133 splx(s); 1134 return (revents); 1135} 1136