uipc_socket.c revision 42902
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.49 1999/01/10 01:58:25 eivind Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52#include <sys/uio.h> 53#include <vm/vm_zone.h> 54 55#include <machine/limits.h> 56 57struct vm_zone *socket_zone; 58so_gen_t so_gencnt; /* generation count for sockets */ 59 60MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 61MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 62 63static int somaxconn = SOMAXCONN; 64SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65 0, ""); 66 67/* 68 * Socket operation routines. 69 * These routines are called by the routines in 70 * sys_socket.c or from a system process, and 71 * implement the semantics of socket operations by 72 * switching out to the protocol specific routines. 73 */ 74 75/* 76 * Get a socket structure from our zone, and initialize it. 77 * We don't implement `waitok' yet (see comments in uipc_domain.c). 78 * Note that it would probably be better to allocate socket 79 * and PCB at the same time, but I'm not convinced that all 80 * the protocols can be easily modified to do this. 81 */ 82struct socket * 83soalloc(waitok) 84 int waitok; 85{ 86 struct socket *so; 87 88 so = zalloci(socket_zone); 89 if (so) { 90 /* XXX race condition for reentrant kernel */ 91 bzero(so, sizeof *so); 92 so->so_gencnt = ++so_gencnt; 93 so->so_zone = socket_zone; 94 } 95 return so; 96} 97 98int 99socreate(dom, aso, type, proto, p) 100 int dom; 101 struct socket **aso; 102 register int type; 103 int proto; 104 struct proc *p; 105{ 106 register struct protosw *prp; 107 register struct socket *so; 108 register int error; 109 110 if (proto) 111 prp = pffindproto(dom, proto, type); 112 else 113 prp = pffindtype(dom, type); 114 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 115 return (EPROTONOSUPPORT); 116 if (prp->pr_type != type) 117 return (EPROTOTYPE); 118 so = soalloc(p != 0); 119 if (so == 0) 120 return (ENOBUFS); 121 122 TAILQ_INIT(&so->so_incomp); 123 TAILQ_INIT(&so->so_comp); 124 so->so_type = type; 125 if (p != 0) 126 so->so_uid = p->p_ucred->cr_uid; 127 so->so_proto = prp; 128 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 129 if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136} 137 138int 139sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143{ 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150} 151 152void 153sodealloc(so) 154 struct socket *so; 155{ 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158} 159 160int 161solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165{ 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181} 182 183void 184sofree(so) 185 register struct socket *so; 186{ 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207} 208 209/* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214int 215soclose(so) 216 register struct socket *so; 217{ 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 funsetown(so->so_sigio); 222 if (so->so_options & SO_ACCEPTCONN) { 223 struct socket *sp, *sonext; 224 225 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 226 sonext = sp->so_list.tqe_next; 227 (void) soabort(sp); 228 } 229 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 230 sonext = sp->so_list.tqe_next; 231 (void) soabort(sp); 232 } 233 } 234 if (so->so_pcb == 0) 235 goto discard; 236 if (so->so_state & SS_ISCONNECTED) { 237 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 238 error = sodisconnect(so); 239 if (error) 240 goto drop; 241 } 242 if (so->so_options & SO_LINGER) { 243 if ((so->so_state & SS_ISDISCONNECTING) && 244 (so->so_state & SS_NBIO)) 245 goto drop; 246 while (so->so_state & SS_ISCONNECTED) { 247 error = tsleep((caddr_t)&so->so_timeo, 248 PSOCK | PCATCH, "soclos", so->so_linger); 249 if (error) 250 break; 251 } 252 } 253 } 254drop: 255 if (so->so_pcb) { 256 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 257 if (error == 0) 258 error = error2; 259 } 260discard: 261 if (so->so_state & SS_NOFDREF) 262 panic("soclose: NOFDREF"); 263 so->so_state |= SS_NOFDREF; 264 sofree(so); 265 splx(s); 266 return (error); 267} 268 269/* 270 * Must be called at splnet... 271 */ 272int 273soabort(so) 274 struct socket *so; 275{ 276 277 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 278} 279 280int 281soaccept(so, nam) 282 register struct socket *so; 283 struct sockaddr **nam; 284{ 285 int s = splnet(); 286 int error; 287 288 if ((so->so_state & SS_NOFDREF) == 0) 289 panic("soaccept: !NOFDREF"); 290 so->so_state &= ~SS_NOFDREF; 291 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 292 splx(s); 293 return (error); 294} 295 296int 297soconnect(so, nam, p) 298 register struct socket *so; 299 struct sockaddr *nam; 300 struct proc *p; 301{ 302 int s; 303 int error; 304 305 if (so->so_options & SO_ACCEPTCONN) 306 return (EOPNOTSUPP); 307 s = splnet(); 308 /* 309 * If protocol is connection-based, can only connect once. 310 * Otherwise, if connected, try to disconnect first. 311 * This allows user to disconnect by connecting to, e.g., 312 * a null address. 313 */ 314 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 315 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 316 (error = sodisconnect(so)))) 317 error = EISCONN; 318 else 319 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 320 splx(s); 321 return (error); 322} 323 324int 325soconnect2(so1, so2) 326 register struct socket *so1; 327 struct socket *so2; 328{ 329 int s = splnet(); 330 int error; 331 332 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 333 splx(s); 334 return (error); 335} 336 337int 338sodisconnect(so) 339 register struct socket *so; 340{ 341 int s = splnet(); 342 int error; 343 344 if ((so->so_state & SS_ISCONNECTED) == 0) { 345 error = ENOTCONN; 346 goto bad; 347 } 348 if (so->so_state & SS_ISDISCONNECTING) { 349 error = EALREADY; 350 goto bad; 351 } 352 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 353bad: 354 splx(s); 355 return (error); 356} 357 358#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 359/* 360 * Send on a socket. 361 * If send must go all at once and message is larger than 362 * send buffering, then hard error. 363 * Lock against other senders. 364 * If must go all at once and not enough room now, then 365 * inform user that this would block and do nothing. 366 * Otherwise, if nonblocking, send as much as possible. 367 * The data to be sent is described by "uio" if nonzero, 368 * otherwise by the mbuf chain "top" (which must be null 369 * if uio is not). Data provided in mbuf chain must be small 370 * enough to send all at once. 371 * 372 * Returns nonzero on error, timeout or signal; callers 373 * must check for short counts if EINTR/ERESTART are returned. 374 * Data and control buffers are freed on return. 375 */ 376int 377sosend(so, addr, uio, top, control, flags, p) 378 register struct socket *so; 379 struct sockaddr *addr; 380 struct uio *uio; 381 struct mbuf *top; 382 struct mbuf *control; 383 int flags; 384 struct proc *p; 385{ 386 struct mbuf **mp; 387 register struct mbuf *m; 388 register long space, len, resid; 389 int clen = 0, error, s, dontroute, mlen; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* 397 * In theory resid should be unsigned. 398 * However, space must be signed, as it might be less than 0 399 * if we over-committed, and we must use a signed comparison 400 * of space and resid. On the other hand, a negative resid 401 * causes us to loop sending 0-length segments to the protocol. 402 * 403 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 404 * type sockets since that's an error. 405 */ 406 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 407 error = EINVAL; 408 goto out; 409 } 410 411 dontroute = 412 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 413 (so->so_proto->pr_flags & PR_ATOMIC); 414 if (p) 415 p->p_stats->p_ru.ru_msgsnd++; 416 if (control) 417 clen = control->m_len; 418#define snderr(errno) { error = errno; splx(s); goto release; } 419 420restart: 421 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 422 if (error) 423 goto out; 424 do { 425 s = splnet(); 426 if (so->so_state & SS_CANTSENDMORE) 427 snderr(EPIPE); 428 if (so->so_error) { 429 error = so->so_error; 430 so->so_error = 0; 431 splx(s); 432 goto release; 433 } 434 if ((so->so_state & SS_ISCONNECTED) == 0) { 435 /* 436 * `sendto' and `sendmsg' is allowed on a connection- 437 * based socket if it supports implied connect. 438 * Return ENOTCONN if not connected and no address is 439 * supplied. 440 */ 441 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 442 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 443 if ((so->so_state & SS_ISCONFIRMING) == 0 && 444 !(resid == 0 && clen != 0)) 445 snderr(ENOTCONN); 446 } else if (addr == 0) 447 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 448 ENOTCONN : EDESTADDRREQ); 449 } 450 space = sbspace(&so->so_snd); 451 if (flags & MSG_OOB) 452 space += 1024; 453 if ((atomic && resid > so->so_snd.sb_hiwat) || 454 clen > so->so_snd.sb_hiwat) 455 snderr(EMSGSIZE); 456 if (space < resid + clen && uio && 457 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 458 if (so->so_state & SS_NBIO) 459 snderr(EWOULDBLOCK); 460 sbunlock(&so->so_snd); 461 error = sbwait(&so->so_snd); 462 splx(s); 463 if (error) 464 goto out; 465 goto restart; 466 } 467 splx(s); 468 mp = ⊤ 469 space -= clen; 470 do { 471 if (uio == NULL) { 472 /* 473 * Data is prepackaged in "top". 474 */ 475 resid = 0; 476 if (flags & MSG_EOR) 477 top->m_flags |= M_EOR; 478 } else do { 479 if (top == 0) { 480 MGETHDR(m, M_WAIT, MT_DATA); 481 mlen = MHLEN; 482 m->m_pkthdr.len = 0; 483 m->m_pkthdr.rcvif = (struct ifnet *)0; 484 } else { 485 MGET(m, M_WAIT, MT_DATA); 486 mlen = MLEN; 487 } 488 if (resid >= MINCLSIZE) { 489 MCLGET(m, M_WAIT); 490 if ((m->m_flags & M_EXT) == 0) 491 goto nopages; 492 mlen = MCLBYTES; 493 len = min(min(mlen, resid), space); 494 } else { 495nopages: 496 len = min(min(mlen, resid), space); 497 /* 498 * For datagram protocols, leave room 499 * for protocol headers in first mbuf. 500 */ 501 if (atomic && top == 0 && len < mlen) 502 MH_ALIGN(m, len); 503 } 504 space -= len; 505 error = uiomove(mtod(m, caddr_t), (int)len, uio); 506 resid = uio->uio_resid; 507 m->m_len = len; 508 *mp = m; 509 top->m_pkthdr.len += len; 510 if (error) 511 goto release; 512 mp = &m->m_next; 513 if (resid <= 0) { 514 if (flags & MSG_EOR) 515 top->m_flags |= M_EOR; 516 break; 517 } 518 } while (space > 0 && atomic); 519 if (dontroute) 520 so->so_options |= SO_DONTROUTE; 521 s = splnet(); /* XXX */ 522 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 523 (flags & MSG_OOB) ? PRUS_OOB : 524 /* 525 * If the user set MSG_EOF, the protocol 526 * understands this flag and nothing left to 527 * send then use PRU_SEND_EOF instead of PRU_SEND. 528 */ 529 ((flags & MSG_EOF) && 530 (so->so_proto->pr_flags & PR_IMPLOPCL) && 531 (resid <= 0)) ? 532 PRUS_EOF : 533 /* If there is more to send set PRUS_MORETOCOME */ 534 (resid > 0) ? PRUS_MORETOCOME : 0, 535 top, addr, control, p); 536 splx(s); 537 if (dontroute) 538 so->so_options &= ~SO_DONTROUTE; 539 clen = 0; 540 control = 0; 541 top = 0; 542 mp = ⊤ 543 if (error) 544 goto release; 545 } while (resid && space > 0); 546 } while (resid); 547 548release: 549 sbunlock(&so->so_snd); 550out: 551 if (top) 552 m_freem(top); 553 if (control) 554 m_freem(control); 555 return (error); 556} 557 558/* 559 * Implement receive operations on a socket. 560 * We depend on the way that records are added to the sockbuf 561 * by sbappend*. In particular, each record (mbufs linked through m_next) 562 * must begin with an address if the protocol so specifies, 563 * followed by an optional mbuf or mbufs containing ancillary data, 564 * and then zero or more mbufs of data. 565 * In order to avoid blocking network interrupts for the entire time here, 566 * we splx() while doing the actual copy to user space. 567 * Although the sockbuf is locked, new data may still be appended, 568 * and thus we must maintain consistency of the sockbuf during that time. 569 * 570 * The caller may receive the data as a single mbuf chain by supplying 571 * an mbuf **mp0 for use in returning the chain. The uio is then used 572 * only for the count in uio_resid. 573 */ 574int 575soreceive(so, psa, uio, mp0, controlp, flagsp) 576 register struct socket *so; 577 struct sockaddr **psa; 578 struct uio *uio; 579 struct mbuf **mp0; 580 struct mbuf **controlp; 581 int *flagsp; 582{ 583 register struct mbuf *m, **mp; 584 register int flags, len, error, s, offset; 585 struct protosw *pr = so->so_proto; 586 struct mbuf *nextrecord; 587 int moff, type = 0; 588 int orig_resid = uio->uio_resid; 589 590 mp = mp0; 591 if (psa) 592 *psa = 0; 593 if (controlp) 594 *controlp = 0; 595 if (flagsp) 596 flags = *flagsp &~ MSG_EOR; 597 else 598 flags = 0; 599 if (flags & MSG_OOB) { 600 m = m_get(M_WAIT, MT_DATA); 601 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 602 if (error) 603 goto bad; 604 do { 605 error = uiomove(mtod(m, caddr_t), 606 (int) min(uio->uio_resid, m->m_len), uio); 607 m = m_free(m); 608 } while (uio->uio_resid && error == 0 && m); 609bad: 610 if (m) 611 m_freem(m); 612 return (error); 613 } 614 if (mp) 615 *mp = (struct mbuf *)0; 616 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 617 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 618 619restart: 620 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 621 if (error) 622 return (error); 623 s = splnet(); 624 625 m = so->so_rcv.sb_mb; 626 /* 627 * If we have less data than requested, block awaiting more 628 * (subject to any timeout) if: 629 * 1. the current count is less than the low water mark, or 630 * 2. MSG_WAITALL is set, and it is possible to do the entire 631 * receive operation at once if we block (resid <= hiwat). 632 * 3. MSG_DONTWAIT is not set 633 * If MSG_WAITALL is set but resid is larger than the receive buffer, 634 * we have to do the receive in sections, and thus risk returning 635 * a short count if a timeout or signal occurs after we start. 636 */ 637 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 638 so->so_rcv.sb_cc < uio->uio_resid) && 639 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 640 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 641 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 642 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); 643 if (so->so_error) { 644 if (m) 645 goto dontblock; 646 error = so->so_error; 647 if ((flags & MSG_PEEK) == 0) 648 so->so_error = 0; 649 goto release; 650 } 651 if (so->so_state & SS_CANTRCVMORE) { 652 if (m) 653 goto dontblock; 654 else 655 goto release; 656 } 657 for (; m; m = m->m_next) 658 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 659 m = so->so_rcv.sb_mb; 660 goto dontblock; 661 } 662 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 663 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 664 error = ENOTCONN; 665 goto release; 666 } 667 if (uio->uio_resid == 0) 668 goto release; 669 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 670 error = EWOULDBLOCK; 671 goto release; 672 } 673 sbunlock(&so->so_rcv); 674 error = sbwait(&so->so_rcv); 675 splx(s); 676 if (error) 677 return (error); 678 goto restart; 679 } 680dontblock: 681 if (uio->uio_procp) 682 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 683 nextrecord = m->m_nextpkt; 684 if (pr->pr_flags & PR_ADDR) { 685 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 686 orig_resid = 0; 687 if (psa) 688 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 689 mp0 == 0); 690 if (flags & MSG_PEEK) { 691 m = m->m_next; 692 } else { 693 sbfree(&so->so_rcv, m); 694 MFREE(m, so->so_rcv.sb_mb); 695 m = so->so_rcv.sb_mb; 696 } 697 } 698 while (m && m->m_type == MT_CONTROL && error == 0) { 699 if (flags & MSG_PEEK) { 700 if (controlp) 701 *controlp = m_copy(m, 0, m->m_len); 702 m = m->m_next; 703 } else { 704 sbfree(&so->so_rcv, m); 705 if (controlp) { 706 if (pr->pr_domain->dom_externalize && 707 mtod(m, struct cmsghdr *)->cmsg_type == 708 SCM_RIGHTS) 709 error = (*pr->pr_domain->dom_externalize)(m); 710 *controlp = m; 711 so->so_rcv.sb_mb = m->m_next; 712 m->m_next = 0; 713 m = so->so_rcv.sb_mb; 714 } else { 715 MFREE(m, so->so_rcv.sb_mb); 716 m = so->so_rcv.sb_mb; 717 } 718 } 719 if (controlp) { 720 orig_resid = 0; 721 controlp = &(*controlp)->m_next; 722 } 723 } 724 if (m) { 725 if ((flags & MSG_PEEK) == 0) 726 m->m_nextpkt = nextrecord; 727 type = m->m_type; 728 if (type == MT_OOBDATA) 729 flags |= MSG_OOB; 730 } 731 moff = 0; 732 offset = 0; 733 while (m && uio->uio_resid > 0 && error == 0) { 734 if (m->m_type == MT_OOBDATA) { 735 if (type != MT_OOBDATA) 736 break; 737 } else if (type == MT_OOBDATA) 738 break; 739 else 740 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 741 ("receive 3")); 742 so->so_state &= ~SS_RCVATMARK; 743 len = uio->uio_resid; 744 if (so->so_oobmark && len > so->so_oobmark - offset) 745 len = so->so_oobmark - offset; 746 if (len > m->m_len - moff) 747 len = m->m_len - moff; 748 /* 749 * If mp is set, just pass back the mbufs. 750 * Otherwise copy them out via the uio, then free. 751 * Sockbuf must be consistent here (points to current mbuf, 752 * it points to next record) when we drop priority; 753 * we must note any additions to the sockbuf when we 754 * block interrupts again. 755 */ 756 if (mp == 0) { 757 splx(s); 758 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 759 s = splnet(); 760 if (error) 761 goto release; 762 } else 763 uio->uio_resid -= len; 764 if (len == m->m_len - moff) { 765 if (m->m_flags & M_EOR) 766 flags |= MSG_EOR; 767 if (flags & MSG_PEEK) { 768 m = m->m_next; 769 moff = 0; 770 } else { 771 nextrecord = m->m_nextpkt; 772 sbfree(&so->so_rcv, m); 773 if (mp) { 774 *mp = m; 775 mp = &m->m_next; 776 so->so_rcv.sb_mb = m = m->m_next; 777 *mp = (struct mbuf *)0; 778 } else { 779 MFREE(m, so->so_rcv.sb_mb); 780 m = so->so_rcv.sb_mb; 781 } 782 if (m) 783 m->m_nextpkt = nextrecord; 784 } 785 } else { 786 if (flags & MSG_PEEK) 787 moff += len; 788 else { 789 if (mp) 790 *mp = m_copym(m, 0, len, M_WAIT); 791 m->m_data += len; 792 m->m_len -= len; 793 so->so_rcv.sb_cc -= len; 794 } 795 } 796 if (so->so_oobmark) { 797 if ((flags & MSG_PEEK) == 0) { 798 so->so_oobmark -= len; 799 if (so->so_oobmark == 0) { 800 so->so_state |= SS_RCVATMARK; 801 break; 802 } 803 } else { 804 offset += len; 805 if (offset == so->so_oobmark) 806 break; 807 } 808 } 809 if (flags & MSG_EOR) 810 break; 811 /* 812 * If the MSG_WAITALL flag is set (for non-atomic socket), 813 * we must not quit until "uio->uio_resid == 0" or an error 814 * termination. If a signal/timeout occurs, return 815 * with a short count but without error. 816 * Keep sockbuf locked against other readers. 817 */ 818 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 819 !sosendallatonce(so) && !nextrecord) { 820 if (so->so_error || so->so_state & SS_CANTRCVMORE) 821 break; 822 error = sbwait(&so->so_rcv); 823 if (error) { 824 sbunlock(&so->so_rcv); 825 splx(s); 826 return (0); 827 } 828 m = so->so_rcv.sb_mb; 829 if (m) 830 nextrecord = m->m_nextpkt; 831 } 832 } 833 834 if (m && pr->pr_flags & PR_ATOMIC) { 835 flags |= MSG_TRUNC; 836 if ((flags & MSG_PEEK) == 0) 837 (void) sbdroprecord(&so->so_rcv); 838 } 839 if ((flags & MSG_PEEK) == 0) { 840 if (m == 0) 841 so->so_rcv.sb_mb = nextrecord; 842 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 843 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 844 } 845 if (orig_resid == uio->uio_resid && orig_resid && 846 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 847 sbunlock(&so->so_rcv); 848 splx(s); 849 goto restart; 850 } 851 852 if (flagsp) 853 *flagsp |= flags; 854release: 855 sbunlock(&so->so_rcv); 856 splx(s); 857 return (error); 858} 859 860int 861soshutdown(so, how) 862 register struct socket *so; 863 register int how; 864{ 865 register struct protosw *pr = so->so_proto; 866 867 how++; 868 if (how & FREAD) 869 sorflush(so); 870 if (how & FWRITE) 871 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 872 return (0); 873} 874 875void 876sorflush(so) 877 register struct socket *so; 878{ 879 register struct sockbuf *sb = &so->so_rcv; 880 register struct protosw *pr = so->so_proto; 881 register int s; 882 struct sockbuf asb; 883 884 sb->sb_flags |= SB_NOINTR; 885 (void) sblock(sb, M_WAITOK); 886 s = splimp(); 887 socantrcvmore(so); 888 sbunlock(sb); 889 asb = *sb; 890 bzero((caddr_t)sb, sizeof (*sb)); 891 splx(s); 892 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 893 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 894 sbrelease(&asb); 895} 896 897/* 898 * Perhaps this routine, and sooptcopyout(), below, ought to come in 899 * an additional variant to handle the case where the option value needs 900 * to be some kind of integer, but not a specific size. 901 * In addition to their use here, these functions are also called by the 902 * protocol-level pr_ctloutput() routines. 903 */ 904int 905sooptcopyin(sopt, buf, len, minlen) 906 struct sockopt *sopt; 907 void *buf; 908 size_t len; 909 size_t minlen; 910{ 911 size_t valsize; 912 913 /* 914 * If the user gives us more than we wanted, we ignore it, 915 * but if we don't get the minimum length the caller 916 * wants, we return EINVAL. On success, sopt->sopt_valsize 917 * is set to however much we actually retrieved. 918 */ 919 if ((valsize = sopt->sopt_valsize) < minlen) 920 return EINVAL; 921 if (valsize > len) 922 sopt->sopt_valsize = valsize = len; 923 924 if (sopt->sopt_p != 0) 925 return (copyin(sopt->sopt_val, buf, valsize)); 926 927 bcopy(sopt->sopt_val, buf, valsize); 928 return 0; 929} 930 931int 932sosetopt(so, sopt) 933 struct socket *so; 934 struct sockopt *sopt; 935{ 936 int error, optval; 937 struct linger l; 938 struct timeval tv; 939 short val; 940 941 error = 0; 942 if (sopt->sopt_level != SOL_SOCKET) { 943 if (so->so_proto && so->so_proto->pr_ctloutput) 944 return ((*so->so_proto->pr_ctloutput) 945 (so, sopt)); 946 error = ENOPROTOOPT; 947 } else { 948 switch (sopt->sopt_name) { 949 case SO_LINGER: 950 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 951 if (error) 952 goto bad; 953 954 so->so_linger = l.l_linger; 955 if (l.l_onoff) 956 so->so_options |= SO_LINGER; 957 else 958 so->so_options &= ~SO_LINGER; 959 break; 960 961 case SO_DEBUG: 962 case SO_KEEPALIVE: 963 case SO_DONTROUTE: 964 case SO_USELOOPBACK: 965 case SO_BROADCAST: 966 case SO_REUSEADDR: 967 case SO_REUSEPORT: 968 case SO_OOBINLINE: 969 case SO_TIMESTAMP: 970 error = sooptcopyin(sopt, &optval, sizeof optval, 971 sizeof optval); 972 if (error) 973 goto bad; 974 if (optval) 975 so->so_options |= sopt->sopt_name; 976 else 977 so->so_options &= ~sopt->sopt_name; 978 break; 979 980 case SO_SNDBUF: 981 case SO_RCVBUF: 982 case SO_SNDLOWAT: 983 case SO_RCVLOWAT: 984 error = sooptcopyin(sopt, &optval, sizeof optval, 985 sizeof optval); 986 if (error) 987 goto bad; 988 989 /* 990 * Values < 1 make no sense for any of these 991 * options, so disallow them. 992 */ 993 if (optval < 1) { 994 error = EINVAL; 995 goto bad; 996 } 997 998 switch (sopt->sopt_name) { 999 case SO_SNDBUF: 1000 case SO_RCVBUF: 1001 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1002 &so->so_snd : &so->so_rcv, 1003 (u_long) optval) == 0) { 1004 error = ENOBUFS; 1005 goto bad; 1006 } 1007 break; 1008 1009 /* 1010 * Make sure the low-water is never greater than 1011 * the high-water. 1012 */ 1013 case SO_SNDLOWAT: 1014 so->so_snd.sb_lowat = 1015 (optval > so->so_snd.sb_hiwat) ? 1016 so->so_snd.sb_hiwat : optval; 1017 break; 1018 case SO_RCVLOWAT: 1019 so->so_rcv.sb_lowat = 1020 (optval > so->so_rcv.sb_hiwat) ? 1021 so->so_rcv.sb_hiwat : optval; 1022 break; 1023 } 1024 break; 1025 1026 case SO_SNDTIMEO: 1027 case SO_RCVTIMEO: 1028 error = sooptcopyin(sopt, &tv, sizeof tv, 1029 sizeof tv); 1030 if (error) 1031 goto bad; 1032 1033 if (tv.tv_sec > SHRT_MAX / hz - hz) { 1034 error = EDOM; 1035 goto bad; 1036 } 1037 val = tv.tv_sec * hz + tv.tv_usec / tick; 1038 1039 switch (sopt->sopt_name) { 1040 case SO_SNDTIMEO: 1041 so->so_snd.sb_timeo = val; 1042 break; 1043 case SO_RCVTIMEO: 1044 so->so_rcv.sb_timeo = val; 1045 break; 1046 } 1047 break; 1048 1049 default: 1050 error = ENOPROTOOPT; 1051 break; 1052 } 1053 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1054 (void) ((*so->so_proto->pr_ctloutput) 1055 (so, sopt)); 1056 } 1057 } 1058bad: 1059 return (error); 1060} 1061 1062/* Helper routine for getsockopt */ 1063int 1064sooptcopyout(sopt, buf, len) 1065 struct sockopt *sopt; 1066 void *buf; 1067 size_t len; 1068{ 1069 int error; 1070 size_t valsize; 1071 1072 error = 0; 1073 1074 /* 1075 * Documented get behavior is that we always return a value, 1076 * possibly truncated to fit in the user's buffer. 1077 * Traditional behavior is that we always tell the user 1078 * precisely how much we copied, rather than something useful 1079 * like the total amount we had available for her. 1080 * Note that this interface is not idempotent; the entire answer must 1081 * generated ahead of time. 1082 */ 1083 valsize = min(len, sopt->sopt_valsize); 1084 sopt->sopt_valsize = valsize; 1085 if (sopt->sopt_val != 0) { 1086 if (sopt->sopt_p != 0) 1087 error = copyout(buf, sopt->sopt_val, valsize); 1088 else 1089 bcopy(buf, sopt->sopt_val, valsize); 1090 } 1091 return error; 1092} 1093 1094int 1095sogetopt(so, sopt) 1096 struct socket *so; 1097 struct sockopt *sopt; 1098{ 1099 int error, optval; 1100 struct linger l; 1101 struct timeval tv; 1102 1103 error = 0; 1104 if (sopt->sopt_level != SOL_SOCKET) { 1105 if (so->so_proto && so->so_proto->pr_ctloutput) { 1106 return ((*so->so_proto->pr_ctloutput) 1107 (so, sopt)); 1108 } else 1109 return (ENOPROTOOPT); 1110 } else { 1111 switch (sopt->sopt_name) { 1112 case SO_LINGER: 1113 l.l_onoff = so->so_options & SO_LINGER; 1114 l.l_linger = so->so_linger; 1115 error = sooptcopyout(sopt, &l, sizeof l); 1116 break; 1117 1118 case SO_USELOOPBACK: 1119 case SO_DONTROUTE: 1120 case SO_DEBUG: 1121 case SO_KEEPALIVE: 1122 case SO_REUSEADDR: 1123 case SO_REUSEPORT: 1124 case SO_BROADCAST: 1125 case SO_OOBINLINE: 1126 case SO_TIMESTAMP: 1127 optval = so->so_options & sopt->sopt_name; 1128integer: 1129 error = sooptcopyout(sopt, &optval, sizeof optval); 1130 break; 1131 1132 case SO_TYPE: 1133 optval = so->so_type; 1134 goto integer; 1135 1136 case SO_ERROR: 1137 optval = so->so_error; 1138 so->so_error = 0; 1139 goto integer; 1140 1141 case SO_SNDBUF: 1142 optval = so->so_snd.sb_hiwat; 1143 goto integer; 1144 1145 case SO_RCVBUF: 1146 optval = so->so_rcv.sb_hiwat; 1147 goto integer; 1148 1149 case SO_SNDLOWAT: 1150 optval = so->so_snd.sb_lowat; 1151 goto integer; 1152 1153 case SO_RCVLOWAT: 1154 optval = so->so_rcv.sb_lowat; 1155 goto integer; 1156 1157 case SO_SNDTIMEO: 1158 case SO_RCVTIMEO: 1159 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1160 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1161 1162 tv.tv_sec = optval / hz; 1163 tv.tv_usec = (optval % hz) * tick; 1164 error = sooptcopyout(sopt, &tv, sizeof tv); 1165 break; 1166 1167 default: 1168 error = ENOPROTOOPT; 1169 break; 1170 } 1171 return (error); 1172 } 1173} 1174 1175void 1176sohasoutofband(so) 1177 register struct socket *so; 1178{ 1179 if (so->so_sigio != NULL) 1180 pgsigio(so->so_sigio, SIGURG, 0); 1181 selwakeup(&so->so_rcv.sb_sel); 1182} 1183 1184int 1185sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1186{ 1187 int revents = 0; 1188 int s = splnet(); 1189 1190 if (events & (POLLIN | POLLRDNORM)) 1191 if (soreadable(so)) 1192 revents |= events & (POLLIN | POLLRDNORM); 1193 1194 if (events & (POLLOUT | POLLWRNORM)) 1195 if (sowriteable(so)) 1196 revents |= events & (POLLOUT | POLLWRNORM); 1197 1198 if (events & (POLLPRI | POLLRDBAND)) 1199 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1200 revents |= events & (POLLPRI | POLLRDBAND); 1201 1202 if (revents == 0) { 1203 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1204 selrecord(p, &so->so_rcv.sb_sel); 1205 so->so_rcv.sb_flags |= SB_SEL; 1206 } 1207 1208 if (events & (POLLOUT | POLLWRNORM)) { 1209 selrecord(p, &so->so_snd.sb_sel); 1210 so->so_snd.sb_flags |= SB_SEL; 1211 } 1212 } 1213 1214 splx(s); 1215 return (revents); 1216} 1217