uipc_socket.c revision 33134
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.35 1998/02/04 22:32:37 eivind Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52 53#include <machine/limits.h> 54 55MALLOC_DEFINE(M_SOCKET, "socket", "socket structure"); 56MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 57MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 58 59static int somaxconn = SOMAXCONN; 60SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 61 0, ""); 62 63/* 64 * Socket operation routines. 65 * These routines are called by the routines in 66 * sys_socket.c or from a system process, and 67 * implement the semantics of socket operations by 68 * switching out to the protocol specific routines. 69 */ 70/*ARGSUSED*/ 71int 72socreate(dom, aso, type, proto, p) 73 int dom; 74 struct socket **aso; 75 register int type; 76 int proto; 77 struct proc *p; 78{ 79 register struct protosw *prp; 80 register struct socket *so; 81 register int error; 82 83 if (proto) 84 prp = pffindproto(dom, proto, type); 85 else 86 prp = pffindtype(dom, type); 87 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 88 return (EPROTONOSUPPORT); 89 if (prp->pr_type != type) 90 return (EPROTOTYPE); 91 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 92 bzero((caddr_t)so, sizeof(*so)); 93 TAILQ_INIT(&so->so_incomp); 94 TAILQ_INIT(&so->so_comp); 95 so->so_type = type; 96 so->so_proto = prp; 97 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 98 if (error) { 99 so->so_state |= SS_NOFDREF; 100 sofree(so); 101 return (error); 102 } 103 *aso = so; 104 return (0); 105} 106 107int 108sobind(so, nam, p) 109 struct socket *so; 110 struct sockaddr *nam; 111 struct proc *p; 112{ 113 int s = splnet(); 114 int error; 115 116 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 117 splx(s); 118 return (error); 119} 120 121int 122solisten(so, backlog, p) 123 register struct socket *so; 124 int backlog; 125 struct proc *p; 126{ 127 int s = splnet(), error; 128 129 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 130 if (error) { 131 splx(s); 132 return (error); 133 } 134 if (so->so_comp.tqh_first == NULL) 135 so->so_options |= SO_ACCEPTCONN; 136 if (backlog < 0 || backlog > somaxconn) 137 backlog = somaxconn; 138 so->so_qlimit = backlog; 139 splx(s); 140 return (0); 141} 142 143void 144sofree(so) 145 register struct socket *so; 146{ 147 struct socket *head = so->so_head; 148 149 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 150 return; 151 if (head != NULL) { 152 if (so->so_state & SS_INCOMP) { 153 TAILQ_REMOVE(&head->so_incomp, so, so_list); 154 head->so_incqlen--; 155 } else if (so->so_state & SS_COMP) { 156 TAILQ_REMOVE(&head->so_comp, so, so_list); 157 } else { 158 panic("sofree: not queued"); 159 } 160 head->so_qlen--; 161 so->so_state &= ~(SS_INCOMP|SS_COMP); 162 so->so_head = NULL; 163 } 164 sbrelease(&so->so_snd); 165 sorflush(so); 166 FREE(so, M_SOCKET); 167} 168 169/* 170 * Close a socket on last file table reference removal. 171 * Initiate disconnect if connected. 172 * Free socket when disconnect complete. 173 */ 174int 175soclose(so) 176 register struct socket *so; 177{ 178 int s = splnet(); /* conservative */ 179 int error = 0; 180 181 if (so->so_options & SO_ACCEPTCONN) { 182 struct socket *sp, *sonext; 183 184 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 185 sonext = sp->so_list.tqe_next; 186 (void) soabort(sp); 187 } 188 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 189 sonext = sp->so_list.tqe_next; 190 (void) soabort(sp); 191 } 192 } 193 if (so->so_pcb == 0) 194 goto discard; 195 if (so->so_state & SS_ISCONNECTED) { 196 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 197 error = sodisconnect(so); 198 if (error) 199 goto drop; 200 } 201 if (so->so_options & SO_LINGER) { 202 if ((so->so_state & SS_ISDISCONNECTING) && 203 (so->so_state & SS_NBIO)) 204 goto drop; 205 while (so->so_state & SS_ISCONNECTED) { 206 error = tsleep((caddr_t)&so->so_timeo, 207 PSOCK | PCATCH, "soclos", so->so_linger); 208 if (error) 209 break; 210 } 211 } 212 } 213drop: 214 if (so->so_pcb) { 215 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 216 if (error == 0) 217 error = error2; 218 } 219discard: 220 if (so->so_state & SS_NOFDREF) 221 panic("soclose: NOFDREF"); 222 so->so_state |= SS_NOFDREF; 223 sofree(so); 224 splx(s); 225 return (error); 226} 227 228/* 229 * Must be called at splnet... 230 */ 231int 232soabort(so) 233 struct socket *so; 234{ 235 236 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 237} 238 239int 240soaccept(so, nam) 241 register struct socket *so; 242 struct sockaddr **nam; 243{ 244 int s = splnet(); 245 int error; 246 247 if ((so->so_state & SS_NOFDREF) == 0) 248 panic("soaccept: !NOFDREF"); 249 so->so_state &= ~SS_NOFDREF; 250 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 251 splx(s); 252 return (error); 253} 254 255int 256soconnect(so, nam, p) 257 register struct socket *so; 258 struct sockaddr *nam; 259 struct proc *p; 260{ 261 int s; 262 int error; 263 264 if (so->so_options & SO_ACCEPTCONN) 265 return (EOPNOTSUPP); 266 s = splnet(); 267 /* 268 * If protocol is connection-based, can only connect once. 269 * Otherwise, if connected, try to disconnect first. 270 * This allows user to disconnect by connecting to, e.g., 271 * a null address. 272 */ 273 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 274 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 275 (error = sodisconnect(so)))) 276 error = EISCONN; 277 else 278 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 279 splx(s); 280 return (error); 281} 282 283int 284soconnect2(so1, so2) 285 register struct socket *so1; 286 struct socket *so2; 287{ 288 int s = splnet(); 289 int error; 290 291 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 292 splx(s); 293 return (error); 294} 295 296int 297sodisconnect(so) 298 register struct socket *so; 299{ 300 int s = splnet(); 301 int error; 302 303 if ((so->so_state & SS_ISCONNECTED) == 0) { 304 error = ENOTCONN; 305 goto bad; 306 } 307 if (so->so_state & SS_ISDISCONNECTING) { 308 error = EALREADY; 309 goto bad; 310 } 311 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 312bad: 313 splx(s); 314 return (error); 315} 316 317#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 318/* 319 * Send on a socket. 320 * If send must go all at once and message is larger than 321 * send buffering, then hard error. 322 * Lock against other senders. 323 * If must go all at once and not enough room now, then 324 * inform user that this would block and do nothing. 325 * Otherwise, if nonblocking, send as much as possible. 326 * The data to be sent is described by "uio" if nonzero, 327 * otherwise by the mbuf chain "top" (which must be null 328 * if uio is not). Data provided in mbuf chain must be small 329 * enough to send all at once. 330 * 331 * Returns nonzero on error, timeout or signal; callers 332 * must check for short counts if EINTR/ERESTART are returned. 333 * Data and control buffers are freed on return. 334 */ 335int 336sosend(so, addr, uio, top, control, flags, p) 337 register struct socket *so; 338 struct sockaddr *addr; 339 struct uio *uio; 340 struct mbuf *top; 341 struct mbuf *control; 342 int flags; 343 struct proc *p; 344{ 345 struct mbuf **mp; 346 register struct mbuf *m; 347 register long space, len, resid; 348 int clen = 0, error, s, dontroute, mlen; 349 int atomic = sosendallatonce(so) || top; 350 351 if (uio) 352 resid = uio->uio_resid; 353 else 354 resid = top->m_pkthdr.len; 355 /* 356 * In theory resid should be unsigned. 357 * However, space must be signed, as it might be less than 0 358 * if we over-committed, and we must use a signed comparison 359 * of space and resid. On the other hand, a negative resid 360 * causes us to loop sending 0-length segments to the protocol. 361 * 362 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 363 * type sockets since that's an error. 364 */ 365 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 366 error = EINVAL; 367 goto out; 368 } 369 370 dontroute = 371 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 372 (so->so_proto->pr_flags & PR_ATOMIC); 373 if (p) 374 p->p_stats->p_ru.ru_msgsnd++; 375 if (control) 376 clen = control->m_len; 377#define snderr(errno) { error = errno; splx(s); goto release; } 378 379restart: 380 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 381 if (error) 382 goto out; 383 do { 384 s = splnet(); 385 if (so->so_state & SS_CANTSENDMORE) 386 snderr(EPIPE); 387 if (so->so_error) 388 snderr(so->so_error); 389 if ((so->so_state & SS_ISCONNECTED) == 0) { 390 /* 391 * `sendto' and `sendmsg' is allowed on a connection- 392 * based socket if it supports implied connect. 393 * Return ENOTCONN if not connected and no address is 394 * supplied. 395 */ 396 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 397 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 398 if ((so->so_state & SS_ISCONFIRMING) == 0 && 399 !(resid == 0 && clen != 0)) 400 snderr(ENOTCONN); 401 } else if (addr == 0) 402 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 403 ENOTCONN : EDESTADDRREQ); 404 } 405 space = sbspace(&so->so_snd); 406 if (flags & MSG_OOB) 407 space += 1024; 408 if ((atomic && resid > so->so_snd.sb_hiwat) || 409 clen > so->so_snd.sb_hiwat) 410 snderr(EMSGSIZE); 411 if (space < resid + clen && uio && 412 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 413 if (so->so_state & SS_NBIO) 414 snderr(EWOULDBLOCK); 415 sbunlock(&so->so_snd); 416 error = sbwait(&so->so_snd); 417 splx(s); 418 if (error) 419 goto out; 420 goto restart; 421 } 422 splx(s); 423 mp = ⊤ 424 space -= clen; 425 do { 426 if (uio == NULL) { 427 /* 428 * Data is prepackaged in "top". 429 */ 430 resid = 0; 431 if (flags & MSG_EOR) 432 top->m_flags |= M_EOR; 433 } else do { 434 if (top == 0) { 435 MGETHDR(m, M_WAIT, MT_DATA); 436 mlen = MHLEN; 437 m->m_pkthdr.len = 0; 438 m->m_pkthdr.rcvif = (struct ifnet *)0; 439 } else { 440 MGET(m, M_WAIT, MT_DATA); 441 mlen = MLEN; 442 } 443 if (resid >= MINCLSIZE) { 444 MCLGET(m, M_WAIT); 445 if ((m->m_flags & M_EXT) == 0) 446 goto nopages; 447 mlen = MCLBYTES; 448 len = min(min(mlen, resid), space); 449 } else { 450nopages: 451 len = min(min(mlen, resid), space); 452 /* 453 * For datagram protocols, leave room 454 * for protocol headers in first mbuf. 455 */ 456 if (atomic && top == 0 && len < mlen) 457 MH_ALIGN(m, len); 458 } 459 space -= len; 460 error = uiomove(mtod(m, caddr_t), (int)len, uio); 461 resid = uio->uio_resid; 462 m->m_len = len; 463 *mp = m; 464 top->m_pkthdr.len += len; 465 if (error) 466 goto release; 467 mp = &m->m_next; 468 if (resid <= 0) { 469 if (flags & MSG_EOR) 470 top->m_flags |= M_EOR; 471 break; 472 } 473 } while (space > 0 && atomic); 474 if (dontroute) 475 so->so_options |= SO_DONTROUTE; 476 s = splnet(); /* XXX */ 477 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 478 (flags & MSG_OOB) ? PRUS_OOB : 479 /* 480 * If the user set MSG_EOF, the protocol 481 * understands this flag and nothing left to 482 * send then use PRU_SEND_EOF instead of PRU_SEND. 483 */ 484 ((flags & MSG_EOF) && 485 (so->so_proto->pr_flags & PR_IMPLOPCL) && 486 (resid <= 0)) ? 487 PRUS_EOF : 0, 488 top, addr, control, p); 489 splx(s); 490 if (dontroute) 491 so->so_options &= ~SO_DONTROUTE; 492 clen = 0; 493 control = 0; 494 top = 0; 495 mp = ⊤ 496 if (error) 497 goto release; 498 } while (resid && space > 0); 499 } while (resid); 500 501release: 502 sbunlock(&so->so_snd); 503out: 504 if (top) 505 m_freem(top); 506 if (control) 507 m_freem(control); 508 return (error); 509} 510 511/* 512 * Implement receive operations on a socket. 513 * We depend on the way that records are added to the sockbuf 514 * by sbappend*. In particular, each record (mbufs linked through m_next) 515 * must begin with an address if the protocol so specifies, 516 * followed by an optional mbuf or mbufs containing ancillary data, 517 * and then zero or more mbufs of data. 518 * In order to avoid blocking network interrupts for the entire time here, 519 * we splx() while doing the actual copy to user space. 520 * Although the sockbuf is locked, new data may still be appended, 521 * and thus we must maintain consistency of the sockbuf during that time. 522 * 523 * The caller may receive the data as a single mbuf chain by supplying 524 * an mbuf **mp0 for use in returning the chain. The uio is then used 525 * only for the count in uio_resid. 526 */ 527int 528soreceive(so, psa, uio, mp0, controlp, flagsp) 529 register struct socket *so; 530 struct sockaddr **psa; 531 struct uio *uio; 532 struct mbuf **mp0; 533 struct mbuf **controlp; 534 int *flagsp; 535{ 536 register struct mbuf *m, **mp; 537 register int flags, len, error, s, offset; 538 struct protosw *pr = so->so_proto; 539 struct mbuf *nextrecord; 540 int moff, type = 0; 541 int orig_resid = uio->uio_resid; 542 543 mp = mp0; 544 if (psa) 545 *psa = 0; 546 if (controlp) 547 *controlp = 0; 548 if (flagsp) 549 flags = *flagsp &~ MSG_EOR; 550 else 551 flags = 0; 552 if (flags & MSG_OOB) { 553 m = m_get(M_WAIT, MT_DATA); 554 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 555 if (error) 556 goto bad; 557 do { 558 error = uiomove(mtod(m, caddr_t), 559 (int) min(uio->uio_resid, m->m_len), uio); 560 m = m_free(m); 561 } while (uio->uio_resid && error == 0 && m); 562bad: 563 if (m) 564 m_freem(m); 565 return (error); 566 } 567 if (mp) 568 *mp = (struct mbuf *)0; 569 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 570 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 571 572restart: 573 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 574 if (error) 575 return (error); 576 s = splnet(); 577 578 m = so->so_rcv.sb_mb; 579 /* 580 * If we have less data than requested, block awaiting more 581 * (subject to any timeout) if: 582 * 1. the current count is less than the low water mark, or 583 * 2. MSG_WAITALL is set, and it is possible to do the entire 584 * receive operation at once if we block (resid <= hiwat). 585 * 3. MSG_DONTWAIT is not set 586 * If MSG_WAITALL is set but resid is larger than the receive buffer, 587 * we have to do the receive in sections, and thus risk returning 588 * a short count if a timeout or signal occurs after we start. 589 */ 590 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 591 so->so_rcv.sb_cc < uio->uio_resid) && 592 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 593 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 594 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 595#ifdef DIAGNOSTIC 596 if (m == 0 && so->so_rcv.sb_cc) 597 panic("receive 1"); 598#endif 599 if (so->so_error) { 600 if (m) 601 goto dontblock; 602 error = so->so_error; 603 if ((flags & MSG_PEEK) == 0) 604 so->so_error = 0; 605 goto release; 606 } 607 if (so->so_state & SS_CANTRCVMORE) { 608 if (m) 609 goto dontblock; 610 else 611 goto release; 612 } 613 for (; m; m = m->m_next) 614 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 615 m = so->so_rcv.sb_mb; 616 goto dontblock; 617 } 618 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 619 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 620 error = ENOTCONN; 621 goto release; 622 } 623 if (uio->uio_resid == 0) 624 goto release; 625 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 626 error = EWOULDBLOCK; 627 goto release; 628 } 629 sbunlock(&so->so_rcv); 630 error = sbwait(&so->so_rcv); 631 splx(s); 632 if (error) 633 return (error); 634 goto restart; 635 } 636dontblock: 637 if (uio->uio_procp) 638 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 639 nextrecord = m->m_nextpkt; 640 if (pr->pr_flags & PR_ADDR) { 641#ifdef DIAGNOSTIC 642 if (m->m_type != MT_SONAME) 643 panic("receive 1a"); 644#endif 645 orig_resid = 0; 646 if (psa) 647 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 648 mp0 == 0); 649 if (flags & MSG_PEEK) { 650 m = m->m_next; 651 } else { 652 sbfree(&so->so_rcv, m); 653 MFREE(m, so->so_rcv.sb_mb); 654 m = so->so_rcv.sb_mb; 655 } 656 } 657 while (m && m->m_type == MT_CONTROL && error == 0) { 658 if (flags & MSG_PEEK) { 659 if (controlp) 660 *controlp = m_copy(m, 0, m->m_len); 661 m = m->m_next; 662 } else { 663 sbfree(&so->so_rcv, m); 664 if (controlp) { 665 if (pr->pr_domain->dom_externalize && 666 mtod(m, struct cmsghdr *)->cmsg_type == 667 SCM_RIGHTS) 668 error = (*pr->pr_domain->dom_externalize)(m); 669 *controlp = m; 670 so->so_rcv.sb_mb = m->m_next; 671 m->m_next = 0; 672 m = so->so_rcv.sb_mb; 673 } else { 674 MFREE(m, so->so_rcv.sb_mb); 675 m = so->so_rcv.sb_mb; 676 } 677 } 678 if (controlp) { 679 orig_resid = 0; 680 controlp = &(*controlp)->m_next; 681 } 682 } 683 if (m) { 684 if ((flags & MSG_PEEK) == 0) 685 m->m_nextpkt = nextrecord; 686 type = m->m_type; 687 if (type == MT_OOBDATA) 688 flags |= MSG_OOB; 689 } 690 moff = 0; 691 offset = 0; 692 while (m && uio->uio_resid > 0 && error == 0) { 693 if (m->m_type == MT_OOBDATA) { 694 if (type != MT_OOBDATA) 695 break; 696 } else if (type == MT_OOBDATA) 697 break; 698#ifdef DIAGNOSTIC 699 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 700 panic("receive 3"); 701#endif 702 so->so_state &= ~SS_RCVATMARK; 703 len = uio->uio_resid; 704 if (so->so_oobmark && len > so->so_oobmark - offset) 705 len = so->so_oobmark - offset; 706 if (len > m->m_len - moff) 707 len = m->m_len - moff; 708 /* 709 * If mp is set, just pass back the mbufs. 710 * Otherwise copy them out via the uio, then free. 711 * Sockbuf must be consistent here (points to current mbuf, 712 * it points to next record) when we drop priority; 713 * we must note any additions to the sockbuf when we 714 * block interrupts again. 715 */ 716 if (mp == 0) { 717 splx(s); 718 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 719 s = splnet(); 720 if (error) 721 goto release; 722 } else 723 uio->uio_resid -= len; 724 if (len == m->m_len - moff) { 725 if (m->m_flags & M_EOR) 726 flags |= MSG_EOR; 727 if (flags & MSG_PEEK) { 728 m = m->m_next; 729 moff = 0; 730 } else { 731 nextrecord = m->m_nextpkt; 732 sbfree(&so->so_rcv, m); 733 if (mp) { 734 *mp = m; 735 mp = &m->m_next; 736 so->so_rcv.sb_mb = m = m->m_next; 737 *mp = (struct mbuf *)0; 738 } else { 739 MFREE(m, so->so_rcv.sb_mb); 740 m = so->so_rcv.sb_mb; 741 } 742 if (m) 743 m->m_nextpkt = nextrecord; 744 } 745 } else { 746 if (flags & MSG_PEEK) 747 moff += len; 748 else { 749 if (mp) 750 *mp = m_copym(m, 0, len, M_WAIT); 751 m->m_data += len; 752 m->m_len -= len; 753 so->so_rcv.sb_cc -= len; 754 } 755 } 756 if (so->so_oobmark) { 757 if ((flags & MSG_PEEK) == 0) { 758 so->so_oobmark -= len; 759 if (so->so_oobmark == 0) { 760 so->so_state |= SS_RCVATMARK; 761 break; 762 } 763 } else { 764 offset += len; 765 if (offset == so->so_oobmark) 766 break; 767 } 768 } 769 if (flags & MSG_EOR) 770 break; 771 /* 772 * If the MSG_WAITALL flag is set (for non-atomic socket), 773 * we must not quit until "uio->uio_resid == 0" or an error 774 * termination. If a signal/timeout occurs, return 775 * with a short count but without error. 776 * Keep sockbuf locked against other readers. 777 */ 778 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 779 !sosendallatonce(so) && !nextrecord) { 780 if (so->so_error || so->so_state & SS_CANTRCVMORE) 781 break; 782 error = sbwait(&so->so_rcv); 783 if (error) { 784 sbunlock(&so->so_rcv); 785 splx(s); 786 return (0); 787 } 788 m = so->so_rcv.sb_mb; 789 if (m) 790 nextrecord = m->m_nextpkt; 791 } 792 } 793 794 if (m && pr->pr_flags & PR_ATOMIC) { 795 flags |= MSG_TRUNC; 796 if ((flags & MSG_PEEK) == 0) 797 (void) sbdroprecord(&so->so_rcv); 798 } 799 if ((flags & MSG_PEEK) == 0) { 800 if (m == 0) 801 so->so_rcv.sb_mb = nextrecord; 802 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 803 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 804 } 805 if (orig_resid == uio->uio_resid && orig_resid && 806 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 807 sbunlock(&so->so_rcv); 808 splx(s); 809 goto restart; 810 } 811 812 if (flagsp) 813 *flagsp |= flags; 814release: 815 sbunlock(&so->so_rcv); 816 splx(s); 817 return (error); 818} 819 820int 821soshutdown(so, how) 822 register struct socket *so; 823 register int how; 824{ 825 register struct protosw *pr = so->so_proto; 826 827 how++; 828 if (how & FREAD) 829 sorflush(so); 830 if (how & FWRITE) 831 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 832 return (0); 833} 834 835void 836sorflush(so) 837 register struct socket *so; 838{ 839 register struct sockbuf *sb = &so->so_rcv; 840 register struct protosw *pr = so->so_proto; 841 register int s; 842 struct sockbuf asb; 843 844 sb->sb_flags |= SB_NOINTR; 845 (void) sblock(sb, M_WAITOK); 846 s = splimp(); 847 socantrcvmore(so); 848 sbunlock(sb); 849 asb = *sb; 850 bzero((caddr_t)sb, sizeof (*sb)); 851 splx(s); 852 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 853 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 854 sbrelease(&asb); 855} 856 857int 858sosetopt(so, level, optname, m0, p) 859 register struct socket *so; 860 int level, optname; 861 struct mbuf *m0; 862 struct proc *p; 863{ 864 int error = 0; 865 register struct mbuf *m = m0; 866 867 if (level != SOL_SOCKET) { 868 if (so->so_proto && so->so_proto->pr_ctloutput) 869 return ((*so->so_proto->pr_ctloutput) 870 (PRCO_SETOPT, so, level, optname, &m0, p)); 871 error = ENOPROTOOPT; 872 } else { 873 switch (optname) { 874 875 case SO_LINGER: 876 if (m == NULL || m->m_len != sizeof (struct linger)) { 877 error = EINVAL; 878 goto bad; 879 } 880 so->so_linger = mtod(m, struct linger *)->l_linger; 881 /* fall thru... */ 882 883 case SO_DEBUG: 884 case SO_KEEPALIVE: 885 case SO_DONTROUTE: 886 case SO_USELOOPBACK: 887 case SO_BROADCAST: 888 case SO_REUSEADDR: 889 case SO_REUSEPORT: 890 case SO_OOBINLINE: 891 case SO_TIMESTAMP: 892 if (m == NULL || m->m_len < sizeof (int)) { 893 error = EINVAL; 894 goto bad; 895 } 896 if (*mtod(m, int *)) 897 so->so_options |= optname; 898 else 899 so->so_options &= ~optname; 900 break; 901 902 case SO_SNDBUF: 903 case SO_RCVBUF: 904 case SO_SNDLOWAT: 905 case SO_RCVLOWAT: 906 { 907 int optval; 908 909 if (m == NULL || m->m_len < sizeof (int)) { 910 error = EINVAL; 911 goto bad; 912 } 913 914 /* 915 * Values < 1 make no sense for any of these 916 * options, so disallow them. 917 */ 918 optval = *mtod(m, int *); 919 if (optval < 1) { 920 error = EINVAL; 921 goto bad; 922 } 923 924 switch (optname) { 925 926 case SO_SNDBUF: 927 case SO_RCVBUF: 928 if (sbreserve(optname == SO_SNDBUF ? 929 &so->so_snd : &so->so_rcv, 930 (u_long) optval) == 0) { 931 error = ENOBUFS; 932 goto bad; 933 } 934 break; 935 936 /* 937 * Make sure the low-water is never greater than 938 * the high-water. 939 */ 940 case SO_SNDLOWAT: 941 so->so_snd.sb_lowat = 942 (optval > so->so_snd.sb_hiwat) ? 943 so->so_snd.sb_hiwat : optval; 944 break; 945 case SO_RCVLOWAT: 946 so->so_rcv.sb_lowat = 947 (optval > so->so_rcv.sb_hiwat) ? 948 so->so_rcv.sb_hiwat : optval; 949 break; 950 } 951 break; 952 } 953 954 case SO_SNDTIMEO: 955 case SO_RCVTIMEO: 956 { 957 struct timeval *tv; 958 short val; 959 960 if (m == NULL || m->m_len < sizeof (*tv)) { 961 error = EINVAL; 962 goto bad; 963 } 964 tv = mtod(m, struct timeval *); 965 if (tv->tv_sec > SHRT_MAX / hz - hz) { 966 error = EDOM; 967 goto bad; 968 } 969 val = tv->tv_sec * hz + tv->tv_usec / tick; 970 971 switch (optname) { 972 973 case SO_SNDTIMEO: 974 so->so_snd.sb_timeo = val; 975 break; 976 case SO_RCVTIMEO: 977 so->so_rcv.sb_timeo = val; 978 break; 979 } 980 break; 981 } 982 983 default: 984 error = ENOPROTOOPT; 985 break; 986 } 987 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 988 (void) ((*so->so_proto->pr_ctloutput) 989 (PRCO_SETOPT, so, level, optname, &m0, p)); 990 m = NULL; /* freed by protocol */ 991 } 992 } 993bad: 994 if (m) 995 (void) m_free(m); 996 return (error); 997} 998 999int 1000sogetopt(so, level, optname, mp, p) 1001 register struct socket *so; 1002 int level, optname; 1003 struct mbuf **mp; 1004 struct proc *p; 1005{ 1006 register struct mbuf *m; 1007 1008 if (level != SOL_SOCKET) { 1009 if (so->so_proto && so->so_proto->pr_ctloutput) { 1010 return ((*so->so_proto->pr_ctloutput) 1011 (PRCO_GETOPT, so, level, optname, mp, p)); 1012 } else 1013 return (ENOPROTOOPT); 1014 } else { 1015 m = m_get(M_WAIT, MT_SOOPTS); 1016 m->m_len = sizeof (int); 1017 1018 switch (optname) { 1019 1020 case SO_LINGER: 1021 m->m_len = sizeof (struct linger); 1022 mtod(m, struct linger *)->l_onoff = 1023 so->so_options & SO_LINGER; 1024 mtod(m, struct linger *)->l_linger = so->so_linger; 1025 break; 1026 1027 case SO_USELOOPBACK: 1028 case SO_DONTROUTE: 1029 case SO_DEBUG: 1030 case SO_KEEPALIVE: 1031 case SO_REUSEADDR: 1032 case SO_REUSEPORT: 1033 case SO_BROADCAST: 1034 case SO_OOBINLINE: 1035 case SO_TIMESTAMP: 1036 *mtod(m, int *) = so->so_options & optname; 1037 break; 1038 1039 case SO_TYPE: 1040 *mtod(m, int *) = so->so_type; 1041 break; 1042 1043 case SO_ERROR: 1044 *mtod(m, int *) = so->so_error; 1045 so->so_error = 0; 1046 break; 1047 1048 case SO_SNDBUF: 1049 *mtod(m, int *) = so->so_snd.sb_hiwat; 1050 break; 1051 1052 case SO_RCVBUF: 1053 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1054 break; 1055 1056 case SO_SNDLOWAT: 1057 *mtod(m, int *) = so->so_snd.sb_lowat; 1058 break; 1059 1060 case SO_RCVLOWAT: 1061 *mtod(m, int *) = so->so_rcv.sb_lowat; 1062 break; 1063 1064 case SO_SNDTIMEO: 1065 case SO_RCVTIMEO: 1066 { 1067 int val = (optname == SO_SNDTIMEO ? 1068 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1069 1070 m->m_len = sizeof(struct timeval); 1071 mtod(m, struct timeval *)->tv_sec = val / hz; 1072 mtod(m, struct timeval *)->tv_usec = 1073 (val % hz) * tick; 1074 break; 1075 } 1076 1077 default: 1078 (void)m_free(m); 1079 return (ENOPROTOOPT); 1080 } 1081 *mp = m; 1082 return (0); 1083 } 1084} 1085 1086void 1087sohasoutofband(so) 1088 register struct socket *so; 1089{ 1090 struct proc *p; 1091 1092 if (so->so_pgid < 0) 1093 gsignal(-so->so_pgid, SIGURG); 1094 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1095 psignal(p, SIGURG); 1096 selwakeup(&so->so_rcv.sb_sel); 1097} 1098 1099int 1100sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1101{ 1102 int revents = 0; 1103 int s = splnet(); 1104 1105 if (events & (POLLIN | POLLRDNORM)) 1106 if (soreadable(so)) 1107 revents |= events & (POLLIN | POLLRDNORM); 1108 1109 if (events & (POLLOUT | POLLWRNORM)) 1110 if (sowriteable(so)) 1111 revents |= events & (POLLOUT | POLLWRNORM); 1112 1113 if (events & (POLLPRI | POLLRDBAND)) 1114 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1115 revents |= events & (POLLPRI | POLLRDBAND); 1116 1117 if (revents == 0) { 1118 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1119 selrecord(p, &so->so_rcv.sb_sel); 1120 so->so_rcv.sb_flags |= SB_SEL; 1121 } 1122 1123 if (events & (POLLOUT | POLLWRNORM)) { 1124 selrecord(p, &so->so_snd.sb_sel); 1125 so->so_snd.sb_flags |= SB_SEL; 1126 } 1127 } 1128 1129 splx(s); 1130 return (revents); 1131} 1132