uipc_socket.c revision 41086
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34 * $Id: uipc_socket.c,v 1.45 1998/08/31 18:07:23 wollman Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/proc.h> 40#include <sys/fcntl.h> 41#include <sys/malloc.h> 42#include <sys/mbuf.h> 43#include <sys/domain.h> 44#include <sys/kernel.h> 45#include <sys/poll.h> 46#include <sys/protosw.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/resourcevar.h> 50#include <sys/signalvar.h> 51#include <sys/sysctl.h> 52#include <sys/uio.h> 53#include <vm/vm_zone.h> 54 55#include <machine/limits.h> 56 57struct vm_zone *socket_zone; 58so_gen_t so_gencnt; /* generation count for sockets */ 59 60MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 61MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 62 63static int somaxconn = SOMAXCONN; 64SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65 0, ""); 66 67/* 68 * Socket operation routines. 69 * These routines are called by the routines in 70 * sys_socket.c or from a system process, and 71 * implement the semantics of socket operations by 72 * switching out to the protocol specific routines. 73 */ 74 75/* 76 * Get a socket structure from our zone, and initialize it. 77 * We don't implement `waitok' yet (see comments in uipc_domain.c). 78 * Note that it would probably be better to allocate socket 79 * and PCB at the same time, but I'm not convinced that all 80 * the protocols can be easily modified to do this. 81 */ 82struct socket * 83soalloc(waitok) 84 int waitok; 85{ 86 struct socket *so; 87 88 so = zalloci(socket_zone); 89 if (so) { 90 /* XXX race condition for reentrant kernel */ 91 bzero(so, sizeof *so); 92 so->so_gencnt = ++so_gencnt; 93 so->so_zone = socket_zone; 94 } 95 return so; 96} 97 98int 99socreate(dom, aso, type, proto, p) 100 int dom; 101 struct socket **aso; 102 register int type; 103 int proto; 104 struct proc *p; 105{ 106 register struct protosw *prp; 107 register struct socket *so; 108 register int error; 109 110 if (proto) 111 prp = pffindproto(dom, proto, type); 112 else 113 prp = pffindtype(dom, type); 114 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 115 return (EPROTONOSUPPORT); 116 if (prp->pr_type != type) 117 return (EPROTOTYPE); 118 so = soalloc(p != 0); 119 if (so == 0) 120 return (ENOBUFS); 121 122 TAILQ_INIT(&so->so_incomp); 123 TAILQ_INIT(&so->so_comp); 124 so->so_type = type; 125 if (p != 0) 126 so->so_uid = p->p_ucred->cr_uid; 127 so->so_proto = prp; 128 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 129 if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136} 137 138int 139sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143{ 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150} 151 152void 153sodealloc(so) 154 struct socket *so; 155{ 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158} 159 160int 161solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165{ 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181} 182 183void 184sofree(so) 185 register struct socket *so; 186{ 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207} 208 209/* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214int 215soclose(so) 216 register struct socket *so; 217{ 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 funsetown(so->so_sigio); 222 if (so->so_options & SO_ACCEPTCONN) { 223 struct socket *sp, *sonext; 224 225 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 226 sonext = sp->so_list.tqe_next; 227 (void) soabort(sp); 228 } 229 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 230 sonext = sp->so_list.tqe_next; 231 (void) soabort(sp); 232 } 233 } 234 if (so->so_pcb == 0) 235 goto discard; 236 if (so->so_state & SS_ISCONNECTED) { 237 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 238 error = sodisconnect(so); 239 if (error) 240 goto drop; 241 } 242 if (so->so_options & SO_LINGER) { 243 if ((so->so_state & SS_ISDISCONNECTING) && 244 (so->so_state & SS_NBIO)) 245 goto drop; 246 while (so->so_state & SS_ISCONNECTED) { 247 error = tsleep((caddr_t)&so->so_timeo, 248 PSOCK | PCATCH, "soclos", so->so_linger); 249 if (error) 250 break; 251 } 252 } 253 } 254drop: 255 if (so->so_pcb) { 256 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 257 if (error == 0) 258 error = error2; 259 } 260discard: 261 if (so->so_state & SS_NOFDREF) 262 panic("soclose: NOFDREF"); 263 so->so_state |= SS_NOFDREF; 264 sofree(so); 265 splx(s); 266 return (error); 267} 268 269/* 270 * Must be called at splnet... 271 */ 272int 273soabort(so) 274 struct socket *so; 275{ 276 277 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 278} 279 280int 281soaccept(so, nam) 282 register struct socket *so; 283 struct sockaddr **nam; 284{ 285 int s = splnet(); 286 int error; 287 288 if ((so->so_state & SS_NOFDREF) == 0) 289 panic("soaccept: !NOFDREF"); 290 so->so_state &= ~SS_NOFDREF; 291 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 292 splx(s); 293 return (error); 294} 295 296int 297soconnect(so, nam, p) 298 register struct socket *so; 299 struct sockaddr *nam; 300 struct proc *p; 301{ 302 int s; 303 int error; 304 305 if (so->so_options & SO_ACCEPTCONN) 306 return (EOPNOTSUPP); 307 s = splnet(); 308 /* 309 * If protocol is connection-based, can only connect once. 310 * Otherwise, if connected, try to disconnect first. 311 * This allows user to disconnect by connecting to, e.g., 312 * a null address. 313 */ 314 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 315 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 316 (error = sodisconnect(so)))) 317 error = EISCONN; 318 else 319 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 320 splx(s); 321 return (error); 322} 323 324int 325soconnect2(so1, so2) 326 register struct socket *so1; 327 struct socket *so2; 328{ 329 int s = splnet(); 330 int error; 331 332 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 333 splx(s); 334 return (error); 335} 336 337int 338sodisconnect(so) 339 register struct socket *so; 340{ 341 int s = splnet(); 342 int error; 343 344 if ((so->so_state & SS_ISCONNECTED) == 0) { 345 error = ENOTCONN; 346 goto bad; 347 } 348 if (so->so_state & SS_ISDISCONNECTING) { 349 error = EALREADY; 350 goto bad; 351 } 352 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 353bad: 354 splx(s); 355 return (error); 356} 357 358#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 359/* 360 * Send on a socket. 361 * If send must go all at once and message is larger than 362 * send buffering, then hard error. 363 * Lock against other senders. 364 * If must go all at once and not enough room now, then 365 * inform user that this would block and do nothing. 366 * Otherwise, if nonblocking, send as much as possible. 367 * The data to be sent is described by "uio" if nonzero, 368 * otherwise by the mbuf chain "top" (which must be null 369 * if uio is not). Data provided in mbuf chain must be small 370 * enough to send all at once. 371 * 372 * Returns nonzero on error, timeout or signal; callers 373 * must check for short counts if EINTR/ERESTART are returned. 374 * Data and control buffers are freed on return. 375 */ 376int 377sosend(so, addr, uio, top, control, flags, p) 378 register struct socket *so; 379 struct sockaddr *addr; 380 struct uio *uio; 381 struct mbuf *top; 382 struct mbuf *control; 383 int flags; 384 struct proc *p; 385{ 386 struct mbuf **mp; 387 register struct mbuf *m; 388 register long space, len, resid; 389 int clen = 0, error, s, dontroute, mlen; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* 397 * In theory resid should be unsigned. 398 * However, space must be signed, as it might be less than 0 399 * if we over-committed, and we must use a signed comparison 400 * of space and resid. On the other hand, a negative resid 401 * causes us to loop sending 0-length segments to the protocol. 402 * 403 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 404 * type sockets since that's an error. 405 */ 406 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 407 error = EINVAL; 408 goto out; 409 } 410 411 dontroute = 412 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 413 (so->so_proto->pr_flags & PR_ATOMIC); 414 if (p) 415 p->p_stats->p_ru.ru_msgsnd++; 416 if (control) 417 clen = control->m_len; 418#define snderr(errno) { error = errno; splx(s); goto release; } 419 420restart: 421 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 422 if (error) 423 goto out; 424 do { 425 s = splnet(); 426 if (so->so_state & SS_CANTSENDMORE) 427 snderr(EPIPE); 428 if (so->so_error) { 429 error = so->so_error; 430 so->so_error = 0; 431 splx(s); 432 goto release; 433 } 434 if ((so->so_state & SS_ISCONNECTED) == 0) { 435 /* 436 * `sendto' and `sendmsg' is allowed on a connection- 437 * based socket if it supports implied connect. 438 * Return ENOTCONN if not connected and no address is 439 * supplied. 440 */ 441 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 442 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 443 if ((so->so_state & SS_ISCONFIRMING) == 0 && 444 !(resid == 0 && clen != 0)) 445 snderr(ENOTCONN); 446 } else if (addr == 0) 447 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 448 ENOTCONN : EDESTADDRREQ); 449 } 450 space = sbspace(&so->so_snd); 451 if (flags & MSG_OOB) 452 space += 1024; 453 if ((atomic && resid > so->so_snd.sb_hiwat) || 454 clen > so->so_snd.sb_hiwat) 455 snderr(EMSGSIZE); 456 if (space < resid + clen && uio && 457 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 458 if (so->so_state & SS_NBIO) 459 snderr(EWOULDBLOCK); 460 sbunlock(&so->so_snd); 461 error = sbwait(&so->so_snd); 462 splx(s); 463 if (error) 464 goto out; 465 goto restart; 466 } 467 splx(s); 468 mp = ⊤ 469 space -= clen; 470 do { 471 if (uio == NULL) { 472 /* 473 * Data is prepackaged in "top". 474 */ 475 resid = 0; 476 if (flags & MSG_EOR) 477 top->m_flags |= M_EOR; 478 } else do { 479 if (top == 0) { 480 MGETHDR(m, M_WAIT, MT_DATA); 481 mlen = MHLEN; 482 m->m_pkthdr.len = 0; 483 m->m_pkthdr.rcvif = (struct ifnet *)0; 484 } else { 485 MGET(m, M_WAIT, MT_DATA); 486 mlen = MLEN; 487 } 488 if (resid >= MINCLSIZE) { 489 MCLGET(m, M_WAIT); 490 if ((m->m_flags & M_EXT) == 0) 491 goto nopages; 492 mlen = MCLBYTES; 493 len = min(min(mlen, resid), space); 494 } else { 495nopages: 496 len = min(min(mlen, resid), space); 497 /* 498 * For datagram protocols, leave room 499 * for protocol headers in first mbuf. 500 */ 501 if (atomic && top == 0 && len < mlen) 502 MH_ALIGN(m, len); 503 } 504 space -= len; 505 error = uiomove(mtod(m, caddr_t), (int)len, uio); 506 resid = uio->uio_resid; 507 m->m_len = len; 508 *mp = m; 509 top->m_pkthdr.len += len; 510 if (error) 511 goto release; 512 mp = &m->m_next; 513 if (resid <= 0) { 514 if (flags & MSG_EOR) 515 top->m_flags |= M_EOR; 516 break; 517 } 518 } while (space > 0 && atomic); 519 if (dontroute) 520 so->so_options |= SO_DONTROUTE; 521 s = splnet(); /* XXX */ 522 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 523 (flags & MSG_OOB) ? PRUS_OOB : 524 /* 525 * If the user set MSG_EOF, the protocol 526 * understands this flag and nothing left to 527 * send then use PRU_SEND_EOF instead of PRU_SEND. 528 */ 529 ((flags & MSG_EOF) && 530 (so->so_proto->pr_flags & PR_IMPLOPCL) && 531 (resid <= 0)) ? 532 PRUS_EOF : 0, 533 top, addr, control, p); 534 splx(s); 535 if (dontroute) 536 so->so_options &= ~SO_DONTROUTE; 537 clen = 0; 538 control = 0; 539 top = 0; 540 mp = ⊤ 541 if (error) 542 goto release; 543 } while (resid && space > 0); 544 } while (resid); 545 546release: 547 sbunlock(&so->so_snd); 548out: 549 if (top) 550 m_freem(top); 551 if (control) 552 m_freem(control); 553 return (error); 554} 555 556/* 557 * Implement receive operations on a socket. 558 * We depend on the way that records are added to the sockbuf 559 * by sbappend*. In particular, each record (mbufs linked through m_next) 560 * must begin with an address if the protocol so specifies, 561 * followed by an optional mbuf or mbufs containing ancillary data, 562 * and then zero or more mbufs of data. 563 * In order to avoid blocking network interrupts for the entire time here, 564 * we splx() while doing the actual copy to user space. 565 * Although the sockbuf is locked, new data may still be appended, 566 * and thus we must maintain consistency of the sockbuf during that time. 567 * 568 * The caller may receive the data as a single mbuf chain by supplying 569 * an mbuf **mp0 for use in returning the chain. The uio is then used 570 * only for the count in uio_resid. 571 */ 572int 573soreceive(so, psa, uio, mp0, controlp, flagsp) 574 register struct socket *so; 575 struct sockaddr **psa; 576 struct uio *uio; 577 struct mbuf **mp0; 578 struct mbuf **controlp; 579 int *flagsp; 580{ 581 register struct mbuf *m, **mp; 582 register int flags, len, error, s, offset; 583 struct protosw *pr = so->so_proto; 584 struct mbuf *nextrecord; 585 int moff, type = 0; 586 int orig_resid = uio->uio_resid; 587 588 mp = mp0; 589 if (psa) 590 *psa = 0; 591 if (controlp) 592 *controlp = 0; 593 if (flagsp) 594 flags = *flagsp &~ MSG_EOR; 595 else 596 flags = 0; 597 if (flags & MSG_OOB) { 598 m = m_get(M_WAIT, MT_DATA); 599 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 600 if (error) 601 goto bad; 602 do { 603 error = uiomove(mtod(m, caddr_t), 604 (int) min(uio->uio_resid, m->m_len), uio); 605 m = m_free(m); 606 } while (uio->uio_resid && error == 0 && m); 607bad: 608 if (m) 609 m_freem(m); 610 return (error); 611 } 612 if (mp) 613 *mp = (struct mbuf *)0; 614 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 615 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 616 617restart: 618 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 619 if (error) 620 return (error); 621 s = splnet(); 622 623 m = so->so_rcv.sb_mb; 624 /* 625 * If we have less data than requested, block awaiting more 626 * (subject to any timeout) if: 627 * 1. the current count is less than the low water mark, or 628 * 2. MSG_WAITALL is set, and it is possible to do the entire 629 * receive operation at once if we block (resid <= hiwat). 630 * 3. MSG_DONTWAIT is not set 631 * If MSG_WAITALL is set but resid is larger than the receive buffer, 632 * we have to do the receive in sections, and thus risk returning 633 * a short count if a timeout or signal occurs after we start. 634 */ 635 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 636 so->so_rcv.sb_cc < uio->uio_resid) && 637 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 638 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 639 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 640#ifdef DIAGNOSTIC 641 if (m == 0 && so->so_rcv.sb_cc) 642 panic("receive 1"); 643#endif 644 if (so->so_error) { 645 if (m) 646 goto dontblock; 647 error = so->so_error; 648 if ((flags & MSG_PEEK) == 0) 649 so->so_error = 0; 650 goto release; 651 } 652 if (so->so_state & SS_CANTRCVMORE) { 653 if (m) 654 goto dontblock; 655 else 656 goto release; 657 } 658 for (; m; m = m->m_next) 659 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 660 m = so->so_rcv.sb_mb; 661 goto dontblock; 662 } 663 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 664 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 665 error = ENOTCONN; 666 goto release; 667 } 668 if (uio->uio_resid == 0) 669 goto release; 670 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 671 error = EWOULDBLOCK; 672 goto release; 673 } 674 sbunlock(&so->so_rcv); 675 error = sbwait(&so->so_rcv); 676 splx(s); 677 if (error) 678 return (error); 679 goto restart; 680 } 681dontblock: 682 if (uio->uio_procp) 683 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 684 nextrecord = m->m_nextpkt; 685 if (pr->pr_flags & PR_ADDR) { 686#ifdef DIAGNOSTIC 687 if (m->m_type != MT_SONAME) 688 panic("receive 1a"); 689#endif 690 orig_resid = 0; 691 if (psa) 692 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 693 mp0 == 0); 694 if (flags & MSG_PEEK) { 695 m = m->m_next; 696 } else { 697 sbfree(&so->so_rcv, m); 698 MFREE(m, so->so_rcv.sb_mb); 699 m = so->so_rcv.sb_mb; 700 } 701 } 702 while (m && m->m_type == MT_CONTROL && error == 0) { 703 if (flags & MSG_PEEK) { 704 if (controlp) 705 *controlp = m_copy(m, 0, m->m_len); 706 m = m->m_next; 707 } else { 708 sbfree(&so->so_rcv, m); 709 if (controlp) { 710 if (pr->pr_domain->dom_externalize && 711 mtod(m, struct cmsghdr *)->cmsg_type == 712 SCM_RIGHTS) 713 error = (*pr->pr_domain->dom_externalize)(m); 714 *controlp = m; 715 so->so_rcv.sb_mb = m->m_next; 716 m->m_next = 0; 717 m = so->so_rcv.sb_mb; 718 } else { 719 MFREE(m, so->so_rcv.sb_mb); 720 m = so->so_rcv.sb_mb; 721 } 722 } 723 if (controlp) { 724 orig_resid = 0; 725 controlp = &(*controlp)->m_next; 726 } 727 } 728 if (m) { 729 if ((flags & MSG_PEEK) == 0) 730 m->m_nextpkt = nextrecord; 731 type = m->m_type; 732 if (type == MT_OOBDATA) 733 flags |= MSG_OOB; 734 } 735 moff = 0; 736 offset = 0; 737 while (m && uio->uio_resid > 0 && error == 0) { 738 if (m->m_type == MT_OOBDATA) { 739 if (type != MT_OOBDATA) 740 break; 741 } else if (type == MT_OOBDATA) 742 break; 743#ifdef DIAGNOSTIC 744 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 745 panic("receive 3"); 746#endif 747 so->so_state &= ~SS_RCVATMARK; 748 len = uio->uio_resid; 749 if (so->so_oobmark && len > so->so_oobmark - offset) 750 len = so->so_oobmark - offset; 751 if (len > m->m_len - moff) 752 len = m->m_len - moff; 753 /* 754 * If mp is set, just pass back the mbufs. 755 * Otherwise copy them out via the uio, then free. 756 * Sockbuf must be consistent here (points to current mbuf, 757 * it points to next record) when we drop priority; 758 * we must note any additions to the sockbuf when we 759 * block interrupts again. 760 */ 761 if (mp == 0) { 762 splx(s); 763 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 764 s = splnet(); 765 if (error) 766 goto release; 767 } else 768 uio->uio_resid -= len; 769 if (len == m->m_len - moff) { 770 if (m->m_flags & M_EOR) 771 flags |= MSG_EOR; 772 if (flags & MSG_PEEK) { 773 m = m->m_next; 774 moff = 0; 775 } else { 776 nextrecord = m->m_nextpkt; 777 sbfree(&so->so_rcv, m); 778 if (mp) { 779 *mp = m; 780 mp = &m->m_next; 781 so->so_rcv.sb_mb = m = m->m_next; 782 *mp = (struct mbuf *)0; 783 } else { 784 MFREE(m, so->so_rcv.sb_mb); 785 m = so->so_rcv.sb_mb; 786 } 787 if (m) 788 m->m_nextpkt = nextrecord; 789 } 790 } else { 791 if (flags & MSG_PEEK) 792 moff += len; 793 else { 794 if (mp) 795 *mp = m_copym(m, 0, len, M_WAIT); 796 m->m_data += len; 797 m->m_len -= len; 798 so->so_rcv.sb_cc -= len; 799 } 800 } 801 if (so->so_oobmark) { 802 if ((flags & MSG_PEEK) == 0) { 803 so->so_oobmark -= len; 804 if (so->so_oobmark == 0) { 805 so->so_state |= SS_RCVATMARK; 806 break; 807 } 808 } else { 809 offset += len; 810 if (offset == so->so_oobmark) 811 break; 812 } 813 } 814 if (flags & MSG_EOR) 815 break; 816 /* 817 * If the MSG_WAITALL flag is set (for non-atomic socket), 818 * we must not quit until "uio->uio_resid == 0" or an error 819 * termination. If a signal/timeout occurs, return 820 * with a short count but without error. 821 * Keep sockbuf locked against other readers. 822 */ 823 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 824 !sosendallatonce(so) && !nextrecord) { 825 if (so->so_error || so->so_state & SS_CANTRCVMORE) 826 break; 827 error = sbwait(&so->so_rcv); 828 if (error) { 829 sbunlock(&so->so_rcv); 830 splx(s); 831 return (0); 832 } 833 m = so->so_rcv.sb_mb; 834 if (m) 835 nextrecord = m->m_nextpkt; 836 } 837 } 838 839 if (m && pr->pr_flags & PR_ATOMIC) { 840 flags |= MSG_TRUNC; 841 if ((flags & MSG_PEEK) == 0) 842 (void) sbdroprecord(&so->so_rcv); 843 } 844 if ((flags & MSG_PEEK) == 0) { 845 if (m == 0) 846 so->so_rcv.sb_mb = nextrecord; 847 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 848 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 849 } 850 if (orig_resid == uio->uio_resid && orig_resid && 851 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 852 sbunlock(&so->so_rcv); 853 splx(s); 854 goto restart; 855 } 856 857 if (flagsp) 858 *flagsp |= flags; 859release: 860 sbunlock(&so->so_rcv); 861 splx(s); 862 return (error); 863} 864 865int 866soshutdown(so, how) 867 register struct socket *so; 868 register int how; 869{ 870 register struct protosw *pr = so->so_proto; 871 872 how++; 873 if (how & FREAD) 874 sorflush(so); 875 if (how & FWRITE) 876 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 877 return (0); 878} 879 880void 881sorflush(so) 882 register struct socket *so; 883{ 884 register struct sockbuf *sb = &so->so_rcv; 885 register struct protosw *pr = so->so_proto; 886 register int s; 887 struct sockbuf asb; 888 889 sb->sb_flags |= SB_NOINTR; 890 (void) sblock(sb, M_WAITOK); 891 s = splimp(); 892 socantrcvmore(so); 893 sbunlock(sb); 894 asb = *sb; 895 bzero((caddr_t)sb, sizeof (*sb)); 896 splx(s); 897 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 898 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 899 sbrelease(&asb); 900} 901 902/* 903 * Perhaps this routine, and sooptcopyout(), below, ought to come in 904 * an additional variant to handle the case where the option value needs 905 * to be some kind of integer, but not a specific size. 906 * In addition to their use here, these functions are also called by the 907 * protocol-level pr_ctloutput() routines. 908 */ 909int 910sooptcopyin(sopt, buf, len, minlen) 911 struct sockopt *sopt; 912 void *buf; 913 size_t len; 914 size_t minlen; 915{ 916 size_t valsize; 917 918 /* 919 * If the user gives us more than we wanted, we ignore it, 920 * but if we don't get the minimum length the caller 921 * wants, we return EINVAL. On success, sopt->sopt_valsize 922 * is set to however much we actually retrieved. 923 */ 924 if ((valsize = sopt->sopt_valsize) < minlen) 925 return EINVAL; 926 if (valsize > len) 927 sopt->sopt_valsize = valsize = len; 928 929 if (sopt->sopt_p != 0) 930 return (copyin(sopt->sopt_val, buf, valsize)); 931 932 bcopy(sopt->sopt_val, buf, valsize); 933 return 0; 934} 935 936int 937sosetopt(so, sopt) 938 struct socket *so; 939 struct sockopt *sopt; 940{ 941 int error, optval; 942 struct linger l; 943 struct timeval tv; 944 short val; 945 946 error = 0; 947 if (sopt->sopt_level != SOL_SOCKET) { 948 if (so->so_proto && so->so_proto->pr_ctloutput) 949 return ((*so->so_proto->pr_ctloutput) 950 (so, sopt)); 951 error = ENOPROTOOPT; 952 } else { 953 switch (sopt->sopt_name) { 954 case SO_LINGER: 955 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 956 if (error) 957 goto bad; 958 959 so->so_linger = l.l_linger; 960 if (l.l_onoff) 961 so->so_options |= SO_LINGER; 962 else 963 so->so_options &= ~SO_LINGER; 964 break; 965 966 case SO_DEBUG: 967 case SO_KEEPALIVE: 968 case SO_DONTROUTE: 969 case SO_USELOOPBACK: 970 case SO_BROADCAST: 971 case SO_REUSEADDR: 972 case SO_REUSEPORT: 973 case SO_OOBINLINE: 974 case SO_TIMESTAMP: 975 error = sooptcopyin(sopt, &optval, sizeof optval, 976 sizeof optval); 977 if (error) 978 goto bad; 979 if (optval) 980 so->so_options |= sopt->sopt_name; 981 else 982 so->so_options &= ~sopt->sopt_name; 983 break; 984 985 case SO_SNDBUF: 986 case SO_RCVBUF: 987 case SO_SNDLOWAT: 988 case SO_RCVLOWAT: 989 error = sooptcopyin(sopt, &optval, sizeof optval, 990 sizeof optval); 991 if (error) 992 goto bad; 993 994 /* 995 * Values < 1 make no sense for any of these 996 * options, so disallow them. 997 */ 998 if (optval < 1) { 999 error = EINVAL; 1000 goto bad; 1001 } 1002 1003 switch (sopt->sopt_name) { 1004 case SO_SNDBUF: 1005 case SO_RCVBUF: 1006 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1007 &so->so_snd : &so->so_rcv, 1008 (u_long) optval) == 0) { 1009 error = ENOBUFS; 1010 goto bad; 1011 } 1012 break; 1013 1014 /* 1015 * Make sure the low-water is never greater than 1016 * the high-water. 1017 */ 1018 case SO_SNDLOWAT: 1019 so->so_snd.sb_lowat = 1020 (optval > so->so_snd.sb_hiwat) ? 1021 so->so_snd.sb_hiwat : optval; 1022 break; 1023 case SO_RCVLOWAT: 1024 so->so_rcv.sb_lowat = 1025 (optval > so->so_rcv.sb_hiwat) ? 1026 so->so_rcv.sb_hiwat : optval; 1027 break; 1028 } 1029 break; 1030 1031 case SO_SNDTIMEO: 1032 case SO_RCVTIMEO: 1033 error = sooptcopyin(sopt, &tv, sizeof tv, 1034 sizeof tv); 1035 if (error) 1036 goto bad; 1037 1038 if (tv.tv_sec > SHRT_MAX / hz - hz) { 1039 error = EDOM; 1040 goto bad; 1041 } 1042 val = tv.tv_sec * hz + tv.tv_usec / tick; 1043 1044 switch (sopt->sopt_name) { 1045 case SO_SNDTIMEO: 1046 so->so_snd.sb_timeo = val; 1047 break; 1048 case SO_RCVTIMEO: 1049 so->so_rcv.sb_timeo = val; 1050 break; 1051 } 1052 break; 1053 1054 default: 1055 error = ENOPROTOOPT; 1056 break; 1057 } 1058 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1059 (void) ((*so->so_proto->pr_ctloutput) 1060 (so, sopt)); 1061 } 1062 } 1063bad: 1064 return (error); 1065} 1066 1067/* Helper routine for getsockopt */ 1068int 1069sooptcopyout(sopt, buf, len) 1070 struct sockopt *sopt; 1071 void *buf; 1072 size_t len; 1073{ 1074 int error; 1075 size_t valsize; 1076 1077 error = 0; 1078 1079 /* 1080 * Documented get behavior is that we always return a value, 1081 * possibly truncated to fit in the user's buffer. 1082 * Traditional behavior is that we always tell the user 1083 * precisely how much we copied, rather than something useful 1084 * like the total amount we had available for her. 1085 * Note that this interface is not idempotent; the entire answer must 1086 * generated ahead of time. 1087 */ 1088 valsize = min(len, sopt->sopt_valsize); 1089 sopt->sopt_valsize = valsize; 1090 if (sopt->sopt_val != 0) { 1091 if (sopt->sopt_p != 0) 1092 error = copyout(buf, sopt->sopt_val, valsize); 1093 else 1094 bcopy(buf, sopt->sopt_val, valsize); 1095 } 1096 return error; 1097} 1098 1099int 1100sogetopt(so, sopt) 1101 struct socket *so; 1102 struct sockopt *sopt; 1103{ 1104 int error, optval; 1105 struct linger l; 1106 struct timeval tv; 1107 1108 error = 0; 1109 if (sopt->sopt_level != SOL_SOCKET) { 1110 if (so->so_proto && so->so_proto->pr_ctloutput) { 1111 return ((*so->so_proto->pr_ctloutput) 1112 (so, sopt)); 1113 } else 1114 return (ENOPROTOOPT); 1115 } else { 1116 switch (sopt->sopt_name) { 1117 case SO_LINGER: 1118 l.l_onoff = so->so_options & SO_LINGER; 1119 l.l_linger = so->so_linger; 1120 error = sooptcopyout(sopt, &l, sizeof l); 1121 break; 1122 1123 case SO_USELOOPBACK: 1124 case SO_DONTROUTE: 1125 case SO_DEBUG: 1126 case SO_KEEPALIVE: 1127 case SO_REUSEADDR: 1128 case SO_REUSEPORT: 1129 case SO_BROADCAST: 1130 case SO_OOBINLINE: 1131 case SO_TIMESTAMP: 1132 optval = so->so_options & sopt->sopt_name; 1133integer: 1134 error = sooptcopyout(sopt, &optval, sizeof optval); 1135 break; 1136 1137 case SO_TYPE: 1138 optval = so->so_type; 1139 goto integer; 1140 1141 case SO_ERROR: 1142 optval = so->so_error; 1143 so->so_error = 0; 1144 goto integer; 1145 1146 case SO_SNDBUF: 1147 optval = so->so_snd.sb_hiwat; 1148 goto integer; 1149 1150 case SO_RCVBUF: 1151 optval = so->so_rcv.sb_hiwat; 1152 goto integer; 1153 1154 case SO_SNDLOWAT: 1155 optval = so->so_snd.sb_lowat; 1156 goto integer; 1157 1158 case SO_RCVLOWAT: 1159 optval = so->so_rcv.sb_lowat; 1160 goto integer; 1161 1162 case SO_SNDTIMEO: 1163 case SO_RCVTIMEO: 1164 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1165 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1166 1167 tv.tv_sec = optval / hz; 1168 tv.tv_usec = (optval % hz) * tick; 1169 error = sooptcopyout(sopt, &tv, sizeof tv); 1170 break; 1171 1172 default: 1173 error = ENOPROTOOPT; 1174 break; 1175 } 1176 return (error); 1177 } 1178} 1179 1180void 1181sohasoutofband(so) 1182 register struct socket *so; 1183{ 1184 struct proc *p; 1185 1186 if (so->so_sigio != NULL) 1187 pgsigio(so->so_sigio, SIGURG, 0); 1188 selwakeup(&so->so_rcv.sb_sel); 1189} 1190 1191int 1192sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1193{ 1194 int revents = 0; 1195 int s = splnet(); 1196 1197 if (events & (POLLIN | POLLRDNORM)) 1198 if (soreadable(so)) 1199 revents |= events & (POLLIN | POLLRDNORM); 1200 1201 if (events & (POLLOUT | POLLWRNORM)) 1202 if (sowriteable(so)) 1203 revents |= events & (POLLOUT | POLLWRNORM); 1204 1205 if (events & (POLLPRI | POLLRDBAND)) 1206 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1207 revents |= events & (POLLPRI | POLLRDBAND); 1208 1209 if (revents == 0) { 1210 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1211 selrecord(p, &so->so_rcv.sb_sel); 1212 so->so_rcv.sb_flags |= SB_SEL; 1213 } 1214 1215 if (events & (POLLOUT | POLLWRNORM)) { 1216 selrecord(p, &so->so_snd.sb_sel); 1217 so->so_snd.sb_flags |= SB_SEL; 1218 } 1219 } 1220 1221 splx(s); 1222 return (revents); 1223} 1224