uipc_socket.c revision 30354
17011SN/A/* 212325Spsandoz * Copyright (c) 1982, 1986, 1988, 1990, 1993 37011SN/A * The Regents of the University of California. All rights reserved. 47011SN/A * 57011SN/A * Redistribution and use in source and binary forms, with or without 67011SN/A * modification, are permitted provided that the following conditions 77011SN/A * are met: 87011SN/A * 1. Redistributions of source code must retain the above copyright 97011SN/A * notice, this list of conditions and the following disclaimer. 107011SN/A * 2. Redistributions in binary form must reproduce the above copyright 117011SN/A * notice, this list of conditions and the following disclaimer in the 127011SN/A * documentation and/or other materials provided with the distribution. 137011SN/A * 3. All advertising materials mentioning features or use of this software 147011SN/A * must display the following acknowledgement: 157011SN/A * This product includes software developed by the University of 167011SN/A * California, Berkeley and its contributors. 177011SN/A * 4. Neither the name of the University nor the names of its contributors 187011SN/A * may be used to endorse or promote products derived from this software 197011SN/A * without specific prior written permission. 207011SN/A * 217011SN/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 227011SN/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 237011SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 247011SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 257011SN/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 267011SN/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 277011SN/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 287011SN/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 297011SN/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 307011SN/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 318051SN/A * SUCH DAMAGE. 327011SN/A * 337011SN/A * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 347011SN/A * $Id: uipc_socket.c,v 1.32 1997/10/04 18:21:15 phk Exp $ 357011SN/A */ 367011SN/A 377011SN/A#include <sys/param.h> 387011SN/A#include <sys/systm.h> 397011SN/A#include <sys/proc.h> 407011SN/A#include <sys/fcntl.h> 417011SN/A#include <sys/malloc.h> 427011SN/A#include <sys/mbuf.h> 437011SN/A#include <sys/domain.h> 447011SN/A#include <sys/kernel.h> 457011SN/A#include <sys/poll.h> 467011SN/A#include <sys/protosw.h> 477570SN/A#include <sys/socket.h> 487011SN/A#include <sys/socketvar.h> 497011SN/A#include <sys/resourcevar.h> 507011SN/A#include <sys/signalvar.h> 517011SN/A#include <sys/sysctl.h> 527011SN/A 537011SN/A#include <machine/limits.h> 547011SN/A 557011SN/AMALLOC_DEFINE(M_SOCKET, "socket", "socket structure"); 567011SN/AMALLOC_DEFINE(M_SONAME, "soname", "socket name"); 577011SN/AMALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 587570SN/A 597570SN/Astatic int somaxconn = SOMAXCONN; 607570SN/ASYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 617570SN/A 0, ""); 627570SN/A 637570SN/A/* 647570SN/A * Socket operation routines. 657011SN/A * These routines are called by the routines in 667011SN/A * sys_socket.c or from a system process, and 677011SN/A * implement the semantics of socket operations by 687011SN/A * switching out to the protocol specific routines. 697011SN/A */ 707011SN/A/*ARGSUSED*/ 717011SN/Aint 727011SN/Asocreate(dom, aso, type, proto, p) 737011SN/A int dom; 747011SN/A struct socket **aso; 757011SN/A register int type; 767011SN/A int proto; 7710298SN/A struct proc *p; 787011SN/A{ 797011SN/A register struct protosw *prp; 807011SN/A register struct socket *so; 817011SN/A register int error; 827011SN/A 837011SN/A if (proto) 847011SN/A prp = pffindproto(dom, proto, type); 857011SN/A else 867011SN/A prp = pffindtype(dom, type); 877011SN/A if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 887011SN/A return (EPROTONOSUPPORT); 897011SN/A if (prp->pr_type != type) 907011SN/A return (EPROTOTYPE); 917011SN/A MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); 927011SN/A bzero((caddr_t)so, sizeof(*so)); 937011SN/A TAILQ_INIT(&so->so_incomp); 947011SN/A TAILQ_INIT(&so->so_comp); 957011SN/A so->so_type = type; 967011SN/A so->so_proto = prp; 977011SN/A error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 987011SN/A if (error) { 997011SN/A so->so_state |= SS_NOFDREF; 1007011SN/A sofree(so); 10113916Samlu return (error); 1027011SN/A } 1037011SN/A *aso = so; 1047011SN/A return (0); 1057011SN/A} 1067011SN/A 1077011SN/Aint 1087011SN/Asobind(so, nam, p) 1097011SN/A struct socket *so; 1107011SN/A struct sockaddr *nam; 1117011SN/A struct proc *p; 1127011SN/A{ 1137011SN/A int s = splnet(); 1147011SN/A int error; 1157011SN/A 1167011SN/A error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 1177011SN/A splx(s); 1187011SN/A return (error); 1197011SN/A} 1207011SN/A 1217011SN/Aint 1227011SN/Asolisten(so, backlog, p) 1237011SN/A register struct socket *so; 1247011SN/A int backlog; 1257011SN/A struct proc *p; 1267011SN/A{ 1277011SN/A int s = splnet(), error; 1287011SN/A 1297011SN/A error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 1307011SN/A if (error) { 1317011SN/A splx(s); 1327011SN/A return (error); 1337011SN/A } 1347011SN/A if (so->so_comp.tqh_first == NULL) 1357011SN/A so->so_options |= SO_ACCEPTCONN; 1367011SN/A if (backlog < 0 || backlog > somaxconn) 1377011SN/A backlog = somaxconn; 1387011SN/A so->so_qlimit = backlog; 13912325Spsandoz splx(s); 1407011SN/A return (0); 14112325Spsandoz} 1427011SN/A 1437011SN/Avoid 1447011SN/Asofree(so) 1457011SN/A register struct socket *so; 1467011SN/A{ 1477011SN/A struct socket *head = so->so_head; 1487011SN/A 1497011SN/A if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 1507011SN/A return; 1517011SN/A if (head != NULL) { 1527011SN/A if (so->so_state & SS_INCOMP) { 1537011SN/A TAILQ_REMOVE(&head->so_incomp, so, so_list); 1547011SN/A head->so_incqlen--; 1557011SN/A } else if (so->so_state & SS_COMP) { 1567011SN/A TAILQ_REMOVE(&head->so_comp, so, so_list); 1577570SN/A } else { 1587570SN/A panic("sofree: not queued"); 1597570SN/A } 1607570SN/A head->so_qlen--; 1617570SN/A so->so_state &= ~(SS_INCOMP|SS_COMP); 1627570SN/A so->so_head = NULL; 1637570SN/A } 1647570SN/A sbrelease(&so->so_snd); 1657011SN/A sorflush(so); 1667011SN/A FREE(so, M_SOCKET); 1677011SN/A} 1687011SN/A 1697011SN/A/* 1707011SN/A * Close a socket on last file table reference removal. 1717011SN/A * Initiate disconnect if connected. 1727011SN/A * Free socket when disconnect complete. 1737011SN/A */ 1747011SN/Aint 1757011SN/Asoclose(so) 1767011SN/A register struct socket *so; 1777011SN/A{ 1787011SN/A int s = splnet(); /* conservative */ 1797011SN/A int error = 0; 1807011SN/A 1817011SN/A if (so->so_options & SO_ACCEPTCONN) { 1827011SN/A struct socket *sp, *sonext; 1837011SN/A 1847011SN/A for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 1857011SN/A sonext = sp->so_list.tqe_next; 1867011SN/A (void) soabort(sp); 1877011SN/A } 1887011SN/A for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 1897011SN/A sonext = sp->so_list.tqe_next; 1907011SN/A (void) soabort(sp); 1917011SN/A } 1927011SN/A } 1937011SN/A if (so->so_pcb == 0) 1947011SN/A goto discard; 1957011SN/A if (so->so_state & SS_ISCONNECTED) { 1967011SN/A if ((so->so_state & SS_ISDISCONNECTING) == 0) { 1977011SN/A error = sodisconnect(so); 1987011SN/A if (error) 1997011SN/A goto drop; 2007011SN/A } 2017011SN/A if (so->so_options & SO_LINGER) { 2027011SN/A if ((so->so_state & SS_ISDISCONNECTING) && 2037011SN/A (so->so_state & SS_NBIO)) 2047011SN/A goto drop; 205 while (so->so_state & SS_ISCONNECTED) { 206 error = tsleep((caddr_t)&so->so_timeo, 207 PSOCK | PCATCH, "soclos", so->so_linger); 208 if (error) 209 break; 210 } 211 } 212 } 213drop: 214 if (so->so_pcb) { 215 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 216 if (error == 0) 217 error = error2; 218 } 219discard: 220 if (so->so_state & SS_NOFDREF) 221 panic("soclose: NOFDREF"); 222 so->so_state |= SS_NOFDREF; 223 sofree(so); 224 splx(s); 225 return (error); 226} 227 228/* 229 * Must be called at splnet... 230 */ 231int 232soabort(so) 233 struct socket *so; 234{ 235 236 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 237} 238 239int 240soaccept(so, nam) 241 register struct socket *so; 242 struct sockaddr **nam; 243{ 244 int s = splnet(); 245 int error; 246 247 if ((so->so_state & SS_NOFDREF) == 0) 248 panic("soaccept: !NOFDREF"); 249 so->so_state &= ~SS_NOFDREF; 250 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 251 splx(s); 252 return (error); 253} 254 255int 256soconnect(so, nam, p) 257 register struct socket *so; 258 struct sockaddr *nam; 259 struct proc *p; 260{ 261 int s; 262 int error; 263 264 if (so->so_options & SO_ACCEPTCONN) 265 return (EOPNOTSUPP); 266 s = splnet(); 267 /* 268 * If protocol is connection-based, can only connect once. 269 * Otherwise, if connected, try to disconnect first. 270 * This allows user to disconnect by connecting to, e.g., 271 * a null address. 272 */ 273 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 274 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 275 (error = sodisconnect(so)))) 276 error = EISCONN; 277 else 278 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 279 splx(s); 280 return (error); 281} 282 283int 284soconnect2(so1, so2) 285 register struct socket *so1; 286 struct socket *so2; 287{ 288 int s = splnet(); 289 int error; 290 291 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 292 splx(s); 293 return (error); 294} 295 296int 297sodisconnect(so) 298 register struct socket *so; 299{ 300 int s = splnet(); 301 int error; 302 303 if ((so->so_state & SS_ISCONNECTED) == 0) { 304 error = ENOTCONN; 305 goto bad; 306 } 307 if (so->so_state & SS_ISDISCONNECTING) { 308 error = EALREADY; 309 goto bad; 310 } 311 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 312bad: 313 splx(s); 314 return (error); 315} 316 317#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 318/* 319 * Send on a socket. 320 * If send must go all at once and message is larger than 321 * send buffering, then hard error. 322 * Lock against other senders. 323 * If must go all at once and not enough room now, then 324 * inform user that this would block and do nothing. 325 * Otherwise, if nonblocking, send as much as possible. 326 * The data to be sent is described by "uio" if nonzero, 327 * otherwise by the mbuf chain "top" (which must be null 328 * if uio is not). Data provided in mbuf chain must be small 329 * enough to send all at once. 330 * 331 * Returns nonzero on error, timeout or signal; callers 332 * must check for short counts if EINTR/ERESTART are returned. 333 * Data and control buffers are freed on return. 334 */ 335int 336sosend(so, addr, uio, top, control, flags, p) 337 register struct socket *so; 338 struct sockaddr *addr; 339 struct uio *uio; 340 struct mbuf *top; 341 struct mbuf *control; 342 int flags; 343 struct proc *p; 344{ 345 struct mbuf **mp; 346 register struct mbuf *m; 347 register long space, len, resid; 348 int clen = 0, error, s, dontroute, mlen; 349 int atomic = sosendallatonce(so) || top; 350 351 if (uio) 352 resid = uio->uio_resid; 353 else 354 resid = top->m_pkthdr.len; 355 /* 356 * In theory resid should be unsigned. 357 * However, space must be signed, as it might be less than 0 358 * if we over-committed, and we must use a signed comparison 359 * of space and resid. On the other hand, a negative resid 360 * causes us to loop sending 0-length segments to the protocol. 361 */ 362 if (resid < 0) 363 return (EINVAL); 364 dontroute = 365 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 366 (so->so_proto->pr_flags & PR_ATOMIC); 367 if (p) 368 p->p_stats->p_ru.ru_msgsnd++; 369 if (control) 370 clen = control->m_len; 371#define snderr(errno) { error = errno; splx(s); goto release; } 372 373restart: 374 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 375 if (error) 376 goto out; 377 do { 378 s = splnet(); 379 if (so->so_state & SS_CANTSENDMORE) 380 snderr(EPIPE); 381 if (so->so_error) 382 snderr(so->so_error); 383 if ((so->so_state & SS_ISCONNECTED) == 0) { 384 /* 385 * `sendto' and `sendmsg' is allowed on a connection- 386 * based socket if it supports implied connect. 387 * Return ENOTCONN if not connected and no address is 388 * supplied. 389 */ 390 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 391 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 392 if ((so->so_state & SS_ISCONFIRMING) == 0 && 393 !(resid == 0 && clen != 0)) 394 snderr(ENOTCONN); 395 } else if (addr == 0) 396 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 397 ENOTCONN : EDESTADDRREQ); 398 } 399 space = sbspace(&so->so_snd); 400 if (flags & MSG_OOB) 401 space += 1024; 402 if ((atomic && resid > so->so_snd.sb_hiwat) || 403 clen > so->so_snd.sb_hiwat) 404 snderr(EMSGSIZE); 405 if (space < resid + clen && uio && 406 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 407 if (so->so_state & SS_NBIO) 408 snderr(EWOULDBLOCK); 409 sbunlock(&so->so_snd); 410 error = sbwait(&so->so_snd); 411 splx(s); 412 if (error) 413 goto out; 414 goto restart; 415 } 416 splx(s); 417 mp = ⊤ 418 space -= clen; 419 do { 420 if (uio == NULL) { 421 /* 422 * Data is prepackaged in "top". 423 */ 424 resid = 0; 425 if (flags & MSG_EOR) 426 top->m_flags |= M_EOR; 427 } else do { 428 if (top == 0) { 429 MGETHDR(m, M_WAIT, MT_DATA); 430 mlen = MHLEN; 431 m->m_pkthdr.len = 0; 432 m->m_pkthdr.rcvif = (struct ifnet *)0; 433 } else { 434 MGET(m, M_WAIT, MT_DATA); 435 mlen = MLEN; 436 } 437 if (resid >= MINCLSIZE) { 438 MCLGET(m, M_WAIT); 439 if ((m->m_flags & M_EXT) == 0) 440 goto nopages; 441 mlen = MCLBYTES; 442 len = min(min(mlen, resid), space); 443 } else { 444nopages: 445 len = min(min(mlen, resid), space); 446 /* 447 * For datagram protocols, leave room 448 * for protocol headers in first mbuf. 449 */ 450 if (atomic && top == 0 && len < mlen) 451 MH_ALIGN(m, len); 452 } 453 space -= len; 454 error = uiomove(mtod(m, caddr_t), (int)len, uio); 455 resid = uio->uio_resid; 456 m->m_len = len; 457 *mp = m; 458 top->m_pkthdr.len += len; 459 if (error) 460 goto release; 461 mp = &m->m_next; 462 if (resid <= 0) { 463 if (flags & MSG_EOR) 464 top->m_flags |= M_EOR; 465 break; 466 } 467 } while (space > 0 && atomic); 468 if (dontroute) 469 so->so_options |= SO_DONTROUTE; 470 s = splnet(); /* XXX */ 471 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 472 (flags & MSG_OOB) ? PRUS_OOB : 473 /* 474 * If the user set MSG_EOF, the protocol 475 * understands this flag and nothing left to 476 * send then use PRU_SEND_EOF instead of PRU_SEND. 477 */ 478 ((flags & MSG_EOF) && 479 (so->so_proto->pr_flags & PR_IMPLOPCL) && 480 (resid <= 0)) ? 481 PRUS_EOF : 0, 482 top, addr, control, p); 483 splx(s); 484 if (dontroute) 485 so->so_options &= ~SO_DONTROUTE; 486 clen = 0; 487 control = 0; 488 top = 0; 489 mp = ⊤ 490 if (error) 491 goto release; 492 } while (resid && space > 0); 493 } while (resid); 494 495release: 496 sbunlock(&so->so_snd); 497out: 498 if (top) 499 m_freem(top); 500 if (control) 501 m_freem(control); 502 return (error); 503} 504 505/* 506 * Implement receive operations on a socket. 507 * We depend on the way that records are added to the sockbuf 508 * by sbappend*. In particular, each record (mbufs linked through m_next) 509 * must begin with an address if the protocol so specifies, 510 * followed by an optional mbuf or mbufs containing ancillary data, 511 * and then zero or more mbufs of data. 512 * In order to avoid blocking network interrupts for the entire time here, 513 * we splx() while doing the actual copy to user space. 514 * Although the sockbuf is locked, new data may still be appended, 515 * and thus we must maintain consistency of the sockbuf during that time. 516 * 517 * The caller may receive the data as a single mbuf chain by supplying 518 * an mbuf **mp0 for use in returning the chain. The uio is then used 519 * only for the count in uio_resid. 520 */ 521int 522soreceive(so, psa, uio, mp0, controlp, flagsp) 523 register struct socket *so; 524 struct sockaddr **psa; 525 struct uio *uio; 526 struct mbuf **mp0; 527 struct mbuf **controlp; 528 int *flagsp; 529{ 530 register struct mbuf *m, **mp; 531 register int flags, len, error, s, offset; 532 struct protosw *pr = so->so_proto; 533 struct mbuf *nextrecord; 534 int moff, type = 0; 535 int orig_resid = uio->uio_resid; 536 537 mp = mp0; 538 if (psa) 539 *psa = 0; 540 if (controlp) 541 *controlp = 0; 542 if (flagsp) 543 flags = *flagsp &~ MSG_EOR; 544 else 545 flags = 0; 546 if (flags & MSG_OOB) { 547 m = m_get(M_WAIT, MT_DATA); 548 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 549 if (error) 550 goto bad; 551 do { 552 error = uiomove(mtod(m, caddr_t), 553 (int) min(uio->uio_resid, m->m_len), uio); 554 m = m_free(m); 555 } while (uio->uio_resid && error == 0 && m); 556bad: 557 if (m) 558 m_freem(m); 559 return (error); 560 } 561 if (mp) 562 *mp = (struct mbuf *)0; 563 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 564 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 565 566restart: 567 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 568 if (error) 569 return (error); 570 s = splnet(); 571 572 m = so->so_rcv.sb_mb; 573 /* 574 * If we have less data than requested, block awaiting more 575 * (subject to any timeout) if: 576 * 1. the current count is less than the low water mark, or 577 * 2. MSG_WAITALL is set, and it is possible to do the entire 578 * receive operation at once if we block (resid <= hiwat). 579 * 3. MSG_DONTWAIT is not set 580 * If MSG_WAITALL is set but resid is larger than the receive buffer, 581 * we have to do the receive in sections, and thus risk returning 582 * a short count if a timeout or signal occurs after we start. 583 */ 584 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 585 so->so_rcv.sb_cc < uio->uio_resid) && 586 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 587 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 588 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 589#ifdef DIAGNOSTIC 590 if (m == 0 && so->so_rcv.sb_cc) 591 panic("receive 1"); 592#endif 593 if (so->so_error) { 594 if (m) 595 goto dontblock; 596 error = so->so_error; 597 if ((flags & MSG_PEEK) == 0) 598 so->so_error = 0; 599 goto release; 600 } 601 if (so->so_state & SS_CANTRCVMORE) { 602 if (m) 603 goto dontblock; 604 else 605 goto release; 606 } 607 for (; m; m = m->m_next) 608 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 609 m = so->so_rcv.sb_mb; 610 goto dontblock; 611 } 612 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 613 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 614 error = ENOTCONN; 615 goto release; 616 } 617 if (uio->uio_resid == 0) 618 goto release; 619 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 620 error = EWOULDBLOCK; 621 goto release; 622 } 623 sbunlock(&so->so_rcv); 624 error = sbwait(&so->so_rcv); 625 splx(s); 626 if (error) 627 return (error); 628 goto restart; 629 } 630dontblock: 631 if (uio->uio_procp) 632 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 633 nextrecord = m->m_nextpkt; 634 if (pr->pr_flags & PR_ADDR) { 635#ifdef DIAGNOSTIC 636 if (m->m_type != MT_SONAME) 637 panic("receive 1a"); 638#endif 639 orig_resid = 0; 640 if (psa) 641 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 642 mp0 == 0); 643 if (flags & MSG_PEEK) { 644 m = m->m_next; 645 } else { 646 sbfree(&so->so_rcv, m); 647 MFREE(m, so->so_rcv.sb_mb); 648 m = so->so_rcv.sb_mb; 649 } 650 } 651 while (m && m->m_type == MT_CONTROL && error == 0) { 652 if (flags & MSG_PEEK) { 653 if (controlp) 654 *controlp = m_copy(m, 0, m->m_len); 655 m = m->m_next; 656 } else { 657 sbfree(&so->so_rcv, m); 658 if (controlp) { 659 if (pr->pr_domain->dom_externalize && 660 mtod(m, struct cmsghdr *)->cmsg_type == 661 SCM_RIGHTS) 662 error = (*pr->pr_domain->dom_externalize)(m); 663 *controlp = m; 664 so->so_rcv.sb_mb = m->m_next; 665 m->m_next = 0; 666 m = so->so_rcv.sb_mb; 667 } else { 668 MFREE(m, so->so_rcv.sb_mb); 669 m = so->so_rcv.sb_mb; 670 } 671 } 672 if (controlp) { 673 orig_resid = 0; 674 controlp = &(*controlp)->m_next; 675 } 676 } 677 if (m) { 678 if ((flags & MSG_PEEK) == 0) 679 m->m_nextpkt = nextrecord; 680 type = m->m_type; 681 if (type == MT_OOBDATA) 682 flags |= MSG_OOB; 683 } 684 moff = 0; 685 offset = 0; 686 while (m && uio->uio_resid > 0 && error == 0) { 687 if (m->m_type == MT_OOBDATA) { 688 if (type != MT_OOBDATA) 689 break; 690 } else if (type == MT_OOBDATA) 691 break; 692#ifdef DIAGNOSTIC 693 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 694 panic("receive 3"); 695#endif 696 so->so_state &= ~SS_RCVATMARK; 697 len = uio->uio_resid; 698 if (so->so_oobmark && len > so->so_oobmark - offset) 699 len = so->so_oobmark - offset; 700 if (len > m->m_len - moff) 701 len = m->m_len - moff; 702 /* 703 * If mp is set, just pass back the mbufs. 704 * Otherwise copy them out via the uio, then free. 705 * Sockbuf must be consistent here (points to current mbuf, 706 * it points to next record) when we drop priority; 707 * we must note any additions to the sockbuf when we 708 * block interrupts again. 709 */ 710 if (mp == 0) { 711 splx(s); 712 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 713 s = splnet(); 714 if (error) 715 goto release; 716 } else 717 uio->uio_resid -= len; 718 if (len == m->m_len - moff) { 719 if (m->m_flags & M_EOR) 720 flags |= MSG_EOR; 721 if (flags & MSG_PEEK) { 722 m = m->m_next; 723 moff = 0; 724 } else { 725 nextrecord = m->m_nextpkt; 726 sbfree(&so->so_rcv, m); 727 if (mp) { 728 *mp = m; 729 mp = &m->m_next; 730 so->so_rcv.sb_mb = m = m->m_next; 731 *mp = (struct mbuf *)0; 732 } else { 733 MFREE(m, so->so_rcv.sb_mb); 734 m = so->so_rcv.sb_mb; 735 } 736 if (m) 737 m->m_nextpkt = nextrecord; 738 } 739 } else { 740 if (flags & MSG_PEEK) 741 moff += len; 742 else { 743 if (mp) 744 *mp = m_copym(m, 0, len, M_WAIT); 745 m->m_data += len; 746 m->m_len -= len; 747 so->so_rcv.sb_cc -= len; 748 } 749 } 750 if (so->so_oobmark) { 751 if ((flags & MSG_PEEK) == 0) { 752 so->so_oobmark -= len; 753 if (so->so_oobmark == 0) { 754 so->so_state |= SS_RCVATMARK; 755 break; 756 } 757 } else { 758 offset += len; 759 if (offset == so->so_oobmark) 760 break; 761 } 762 } 763 if (flags & MSG_EOR) 764 break; 765 /* 766 * If the MSG_WAITALL flag is set (for non-atomic socket), 767 * we must not quit until "uio->uio_resid == 0" or an error 768 * termination. If a signal/timeout occurs, return 769 * with a short count but without error. 770 * Keep sockbuf locked against other readers. 771 */ 772 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 773 !sosendallatonce(so) && !nextrecord) { 774 if (so->so_error || so->so_state & SS_CANTRCVMORE) 775 break; 776 error = sbwait(&so->so_rcv); 777 if (error) { 778 sbunlock(&so->so_rcv); 779 splx(s); 780 return (0); 781 } 782 m = so->so_rcv.sb_mb; 783 if (m) 784 nextrecord = m->m_nextpkt; 785 } 786 } 787 788 if (m && pr->pr_flags & PR_ATOMIC) { 789 flags |= MSG_TRUNC; 790 if ((flags & MSG_PEEK) == 0) 791 (void) sbdroprecord(&so->so_rcv); 792 } 793 if ((flags & MSG_PEEK) == 0) { 794 if (m == 0) 795 so->so_rcv.sb_mb = nextrecord; 796 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 797 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 798 } 799 if (orig_resid == uio->uio_resid && orig_resid && 800 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 801 sbunlock(&so->so_rcv); 802 splx(s); 803 goto restart; 804 } 805 806 if (flagsp) 807 *flagsp |= flags; 808release: 809 sbunlock(&so->so_rcv); 810 splx(s); 811 return (error); 812} 813 814int 815soshutdown(so, how) 816 register struct socket *so; 817 register int how; 818{ 819 register struct protosw *pr = so->so_proto; 820 821 how++; 822 if (how & FREAD) 823 sorflush(so); 824 if (how & FWRITE) 825 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 826 return (0); 827} 828 829void 830sorflush(so) 831 register struct socket *so; 832{ 833 register struct sockbuf *sb = &so->so_rcv; 834 register struct protosw *pr = so->so_proto; 835 register int s; 836 struct sockbuf asb; 837 838 sb->sb_flags |= SB_NOINTR; 839 (void) sblock(sb, M_WAITOK); 840 s = splimp(); 841 socantrcvmore(so); 842 sbunlock(sb); 843 asb = *sb; 844 bzero((caddr_t)sb, sizeof (*sb)); 845 splx(s); 846 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 847 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 848 sbrelease(&asb); 849} 850 851int 852sosetopt(so, level, optname, m0, p) 853 register struct socket *so; 854 int level, optname; 855 struct mbuf *m0; 856 struct proc *p; 857{ 858 int error = 0; 859 register struct mbuf *m = m0; 860 861 if (level != SOL_SOCKET) { 862 if (so->so_proto && so->so_proto->pr_ctloutput) 863 return ((*so->so_proto->pr_ctloutput) 864 (PRCO_SETOPT, so, level, optname, &m0, p)); 865 error = ENOPROTOOPT; 866 } else { 867 switch (optname) { 868 869 case SO_LINGER: 870 if (m == NULL || m->m_len != sizeof (struct linger)) { 871 error = EINVAL; 872 goto bad; 873 } 874 so->so_linger = mtod(m, struct linger *)->l_linger; 875 /* fall thru... */ 876 877 case SO_DEBUG: 878 case SO_KEEPALIVE: 879 case SO_DONTROUTE: 880 case SO_USELOOPBACK: 881 case SO_BROADCAST: 882 case SO_REUSEADDR: 883 case SO_REUSEPORT: 884 case SO_OOBINLINE: 885 case SO_TIMESTAMP: 886 if (m == NULL || m->m_len < sizeof (int)) { 887 error = EINVAL; 888 goto bad; 889 } 890 if (*mtod(m, int *)) 891 so->so_options |= optname; 892 else 893 so->so_options &= ~optname; 894 break; 895 896 case SO_SNDBUF: 897 case SO_RCVBUF: 898 case SO_SNDLOWAT: 899 case SO_RCVLOWAT: 900 { 901 int optval; 902 903 if (m == NULL || m->m_len < sizeof (int)) { 904 error = EINVAL; 905 goto bad; 906 } 907 908 /* 909 * Values < 1 make no sense for any of these 910 * options, so disallow them. 911 */ 912 optval = *mtod(m, int *); 913 if (optval < 1) { 914 error = EINVAL; 915 goto bad; 916 } 917 918 switch (optname) { 919 920 case SO_SNDBUF: 921 case SO_RCVBUF: 922 if (sbreserve(optname == SO_SNDBUF ? 923 &so->so_snd : &so->so_rcv, 924 (u_long) optval) == 0) { 925 error = ENOBUFS; 926 goto bad; 927 } 928 break; 929 930 /* 931 * Make sure the low-water is never greater than 932 * the high-water. 933 */ 934 case SO_SNDLOWAT: 935 so->so_snd.sb_lowat = 936 (optval > so->so_snd.sb_hiwat) ? 937 so->so_snd.sb_hiwat : optval; 938 break; 939 case SO_RCVLOWAT: 940 so->so_rcv.sb_lowat = 941 (optval > so->so_rcv.sb_hiwat) ? 942 so->so_rcv.sb_hiwat : optval; 943 break; 944 } 945 break; 946 } 947 948 case SO_SNDTIMEO: 949 case SO_RCVTIMEO: 950 { 951 struct timeval *tv; 952 short val; 953 954 if (m == NULL || m->m_len < sizeof (*tv)) { 955 error = EINVAL; 956 goto bad; 957 } 958 tv = mtod(m, struct timeval *); 959 if (tv->tv_sec > SHRT_MAX / hz - hz) { 960 error = EDOM; 961 goto bad; 962 } 963 val = tv->tv_sec * hz + tv->tv_usec / tick; 964 965 switch (optname) { 966 967 case SO_SNDTIMEO: 968 so->so_snd.sb_timeo = val; 969 break; 970 case SO_RCVTIMEO: 971 so->so_rcv.sb_timeo = val; 972 break; 973 } 974 break; 975 } 976 977 default: 978 error = ENOPROTOOPT; 979 break; 980 } 981 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 982 (void) ((*so->so_proto->pr_ctloutput) 983 (PRCO_SETOPT, so, level, optname, &m0, p)); 984 m = NULL; /* freed by protocol */ 985 } 986 } 987bad: 988 if (m) 989 (void) m_free(m); 990 return (error); 991} 992 993int 994sogetopt(so, level, optname, mp, p) 995 register struct socket *so; 996 int level, optname; 997 struct mbuf **mp; 998 struct proc *p; 999{ 1000 register struct mbuf *m; 1001 1002 if (level != SOL_SOCKET) { 1003 if (so->so_proto && so->so_proto->pr_ctloutput) { 1004 return ((*so->so_proto->pr_ctloutput) 1005 (PRCO_GETOPT, so, level, optname, mp, p)); 1006 } else 1007 return (ENOPROTOOPT); 1008 } else { 1009 m = m_get(M_WAIT, MT_SOOPTS); 1010 m->m_len = sizeof (int); 1011 1012 switch (optname) { 1013 1014 case SO_LINGER: 1015 m->m_len = sizeof (struct linger); 1016 mtod(m, struct linger *)->l_onoff = 1017 so->so_options & SO_LINGER; 1018 mtod(m, struct linger *)->l_linger = so->so_linger; 1019 break; 1020 1021 case SO_USELOOPBACK: 1022 case SO_DONTROUTE: 1023 case SO_DEBUG: 1024 case SO_KEEPALIVE: 1025 case SO_REUSEADDR: 1026 case SO_REUSEPORT: 1027 case SO_BROADCAST: 1028 case SO_OOBINLINE: 1029 case SO_TIMESTAMP: 1030 *mtod(m, int *) = so->so_options & optname; 1031 break; 1032 1033 case SO_TYPE: 1034 *mtod(m, int *) = so->so_type; 1035 break; 1036 1037 case SO_ERROR: 1038 *mtod(m, int *) = so->so_error; 1039 so->so_error = 0; 1040 break; 1041 1042 case SO_SNDBUF: 1043 *mtod(m, int *) = so->so_snd.sb_hiwat; 1044 break; 1045 1046 case SO_RCVBUF: 1047 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1048 break; 1049 1050 case SO_SNDLOWAT: 1051 *mtod(m, int *) = so->so_snd.sb_lowat; 1052 break; 1053 1054 case SO_RCVLOWAT: 1055 *mtod(m, int *) = so->so_rcv.sb_lowat; 1056 break; 1057 1058 case SO_SNDTIMEO: 1059 case SO_RCVTIMEO: 1060 { 1061 int val = (optname == SO_SNDTIMEO ? 1062 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1063 1064 m->m_len = sizeof(struct timeval); 1065 mtod(m, struct timeval *)->tv_sec = val / hz; 1066 mtod(m, struct timeval *)->tv_usec = 1067 (val % hz) * tick; 1068 break; 1069 } 1070 1071 default: 1072 (void)m_free(m); 1073 return (ENOPROTOOPT); 1074 } 1075 *mp = m; 1076 return (0); 1077 } 1078} 1079 1080void 1081sohasoutofband(so) 1082 register struct socket *so; 1083{ 1084 struct proc *p; 1085 1086 if (so->so_pgid < 0) 1087 gsignal(-so->so_pgid, SIGURG); 1088 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1089 psignal(p, SIGURG); 1090 selwakeup(&so->so_rcv.sb_sel); 1091} 1092 1093int 1094sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1095{ 1096 int revents = 0; 1097 int s = splnet(); 1098 1099 if (events & (POLLIN | POLLRDNORM)) 1100 if (soreadable(so)) 1101 revents |= events & (POLLIN | POLLRDNORM); 1102 1103 if (events & (POLLOUT | POLLWRNORM)) 1104 if (sowriteable(so)) 1105 revents |= events & (POLLOUT | POLLWRNORM); 1106 1107 if (events & (POLLPRI | POLLRDBAND)) 1108 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1109 revents |= events & (POLLPRI | POLLRDBAND); 1110 1111 if (revents == 0) { 1112 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1113 selrecord(p, &so->so_rcv.sb_sel); 1114 so->so_rcv.sb_flags |= SB_SEL; 1115 } 1116 1117 if (events & (POLLOUT | POLLWRNORM)) { 1118 selrecord(p, &so->so_snd.sb_sel); 1119 so->so_snd.sb_flags |= SB_SEL; 1120 } 1121 } 1122 1123 splx(s); 1124 return (revents); 1125} 1126