uipc_socket.c revision 37444
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30110401Scharnier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes * 33110401Scharnier * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 3416509Sjraynard * $Id: uipc_socket.c,v 1.40 1998/05/15 20:11:30 wollman Exp $ 351590Srgrimes */ 3693523Sdwmalone 3793523Sdwmalone#include <sys/param.h> 3893523Sdwmalone#include <sys/systm.h> 391590Srgrimes#include <sys/proc.h> 401590Srgrimes#include <sys/fcntl.h> 411590Srgrimes#include <sys/malloc.h> 421590Srgrimes#include <sys/mbuf.h> 43138129Sdas#include <sys/domain.h> 441590Srgrimes#include <sys/kernel.h> 4516509Sjraynard#include <sys/poll.h> 46200462Sdelphij#include <sys/protosw.h> 47200462Sdelphij#include <sys/socket.h> 481590Srgrimes#include <sys/socketvar.h> 491590Srgrimes#include <sys/resourcevar.h> 5093523Sdwmalone#include <sys/signalvar.h> 5193523Sdwmalone#include <sys/sysctl.h> 5293523Sdwmalone#include <sys/uio.h> 5393523Sdwmalone#include <vm/vm_zone.h> 5493523Sdwmalone 5595649Smarkm#include <machine/limits.h> 561590Srgrimes 571590Srgrimesstruct vm_zone *socket_zone; 581590Srgrimesso_gen_t so_gencnt; /* generation count for sockets */ 591590Srgrimes 601590SrgrimesMALLOC_DEFINE(M_SONAME, "soname", "socket name"); 611590SrgrimesMALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 621590Srgrimes 631590Srgrimesstatic int somaxconn = SOMAXCONN; 64226269SdesSYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 65226269Sdes 0, ""); 66226269Sdes 67233925Sjhb/* 68233925Sjhb * Socket operation routines. 69233925Sjhb * These routines are called by the routines in 701590Srgrimes * sys_socket.c or from a system process, and 711590Srgrimes * implement the semantics of socket operations by 721590Srgrimes * switching out to the protocol specific routines. 73226269Sdes */ 74226269Sdes 751590Srgrimes/* 761590Srgrimes * Get a socket structure from our zone, and initialize it. 771590Srgrimes * We don't implement `waitok' yet (see comments in uipc_domain.c). 781590Srgrimes * Note that it would probably be better to allocate socket 79176471Sdes * and PCB at the same time, but I'm not convinced that all 80176471Sdes * the protocols can be easily modified to do this. 81176471Sdes */ 8218399Sphkstruct socket * 8318399Sphksoalloc(waitok) 8418399Sphk int waitok; 851590Srgrimes{ 861590Srgrimes struct socket *so; 871590Srgrimes 88189707Sjhb so = zalloci(socket_zone); 89189707Sjhb if (so) { 90189707Sjhb /* XXX race condition for reentrant kernel */ 911590Srgrimes bzero(so, sizeof *so); 921590Srgrimes so->so_gencnt = ++so_gencnt; 931590Srgrimes so->so_zone = socket_zone; 941590Srgrimes } 951590Srgrimes return so; 961590Srgrimes} 971590Srgrimes 981590Srgrimesint 991590Srgrimessocreate(dom, aso, type, proto, p) 1001590Srgrimes int dom; 1011590Srgrimes struct socket **aso; 10293523Sdwmalone register int type; 10395649Smarkm int proto; 1041590Srgrimes struct proc *p; 1051590Srgrimes{ 1061590Srgrimes register struct protosw *prp; 1071590Srgrimes register struct socket *so; 1081590Srgrimes register int error; 1091590Srgrimes 11093523Sdwmalone if (proto) 11195649Smarkm prp = pffindproto(dom, proto, type); 1121590Srgrimes else 1131590Srgrimes prp = pffindtype(dom, type); 1141590Srgrimes if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 1151590Srgrimes return (EPROTONOSUPPORT); 1161590Srgrimes if (prp->pr_type != type) 1171590Srgrimes return (EPROTOTYPE); 11893523Sdwmalone so = soalloc(p != 0); 11995649Smarkm if (so == 0) 1201590Srgrimes return (ENOBUFS); 1211590Srgrimes 1221590Srgrimes TAILQ_INIT(&so->so_incomp); 1231590Srgrimes TAILQ_INIT(&so->so_comp); 1241590Srgrimes so->so_type = type; 1251590Srgrimes if (p != 0) 1261590Srgrimes so->so_uid = p->p_ucred->cr_uid; 1271590Srgrimes so->so_proto = prp; 1281590Srgrimes error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 1291590Srgrimes if (error) { 130 so->so_state |= SS_NOFDREF; 131 sofree(so); 132 return (error); 133 } 134 *aso = so; 135 return (0); 136} 137 138int 139sobind(so, nam, p) 140 struct socket *so; 141 struct sockaddr *nam; 142 struct proc *p; 143{ 144 int s = splnet(); 145 int error; 146 147 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 148 splx(s); 149 return (error); 150} 151 152void 153sodealloc(so) 154 struct socket *so; 155{ 156 so->so_gencnt = ++so_gencnt; 157 zfreei(so->so_zone, so); 158} 159 160int 161solisten(so, backlog, p) 162 register struct socket *so; 163 int backlog; 164 struct proc *p; 165{ 166 int s, error; 167 168 s = splnet(); 169 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 170 if (error) { 171 splx(s); 172 return (error); 173 } 174 if (so->so_comp.tqh_first == NULL) 175 so->so_options |= SO_ACCEPTCONN; 176 if (backlog < 0 || backlog > somaxconn) 177 backlog = somaxconn; 178 so->so_qlimit = backlog; 179 splx(s); 180 return (0); 181} 182 183void 184sofree(so) 185 register struct socket *so; 186{ 187 struct socket *head = so->so_head; 188 189 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 190 return; 191 if (head != NULL) { 192 if (so->so_state & SS_INCOMP) { 193 TAILQ_REMOVE(&head->so_incomp, so, so_list); 194 head->so_incqlen--; 195 } else if (so->so_state & SS_COMP) { 196 TAILQ_REMOVE(&head->so_comp, so, so_list); 197 } else { 198 panic("sofree: not queued"); 199 } 200 head->so_qlen--; 201 so->so_state &= ~(SS_INCOMP|SS_COMP); 202 so->so_head = NULL; 203 } 204 sbrelease(&so->so_snd); 205 sorflush(so); 206 sodealloc(so); 207} 208 209/* 210 * Close a socket on last file table reference removal. 211 * Initiate disconnect if connected. 212 * Free socket when disconnect complete. 213 */ 214int 215soclose(so) 216 register struct socket *so; 217{ 218 int s = splnet(); /* conservative */ 219 int error = 0; 220 221 if (so->so_options & SO_ACCEPTCONN) { 222 struct socket *sp, *sonext; 223 224 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { 225 sonext = sp->so_list.tqe_next; 226 (void) soabort(sp); 227 } 228 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { 229 sonext = sp->so_list.tqe_next; 230 (void) soabort(sp); 231 } 232 } 233 if (so->so_pcb == 0) 234 goto discard; 235 if (so->so_state & SS_ISCONNECTED) { 236 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 237 error = sodisconnect(so); 238 if (error) 239 goto drop; 240 } 241 if (so->so_options & SO_LINGER) { 242 if ((so->so_state & SS_ISDISCONNECTING) && 243 (so->so_state & SS_NBIO)) 244 goto drop; 245 while (so->so_state & SS_ISCONNECTED) { 246 error = tsleep((caddr_t)&so->so_timeo, 247 PSOCK | PCATCH, "soclos", so->so_linger); 248 if (error) 249 break; 250 } 251 } 252 } 253drop: 254 if (so->so_pcb) { 255 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 256 if (error == 0) 257 error = error2; 258 } 259discard: 260 if (so->so_state & SS_NOFDREF) 261 panic("soclose: NOFDREF"); 262 so->so_state |= SS_NOFDREF; 263 sofree(so); 264 splx(s); 265 return (error); 266} 267 268/* 269 * Must be called at splnet... 270 */ 271int 272soabort(so) 273 struct socket *so; 274{ 275 276 return (*so->so_proto->pr_usrreqs->pru_abort)(so); 277} 278 279int 280soaccept(so, nam) 281 register struct socket *so; 282 struct sockaddr **nam; 283{ 284 int s = splnet(); 285 int error; 286 287 if ((so->so_state & SS_NOFDREF) == 0) 288 panic("soaccept: !NOFDREF"); 289 so->so_state &= ~SS_NOFDREF; 290 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 291 splx(s); 292 return (error); 293} 294 295int 296soconnect(so, nam, p) 297 register struct socket *so; 298 struct sockaddr *nam; 299 struct proc *p; 300{ 301 int s; 302 int error; 303 304 if (so->so_options & SO_ACCEPTCONN) 305 return (EOPNOTSUPP); 306 s = splnet(); 307 /* 308 * If protocol is connection-based, can only connect once. 309 * Otherwise, if connected, try to disconnect first. 310 * This allows user to disconnect by connecting to, e.g., 311 * a null address. 312 */ 313 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 314 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 315 (error = sodisconnect(so)))) 316 error = EISCONN; 317 else 318 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 319 splx(s); 320 return (error); 321} 322 323int 324soconnect2(so1, so2) 325 register struct socket *so1; 326 struct socket *so2; 327{ 328 int s = splnet(); 329 int error; 330 331 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 332 splx(s); 333 return (error); 334} 335 336int 337sodisconnect(so) 338 register struct socket *so; 339{ 340 int s = splnet(); 341 int error; 342 343 if ((so->so_state & SS_ISCONNECTED) == 0) { 344 error = ENOTCONN; 345 goto bad; 346 } 347 if (so->so_state & SS_ISDISCONNECTING) { 348 error = EALREADY; 349 goto bad; 350 } 351 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 352bad: 353 splx(s); 354 return (error); 355} 356 357#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 358/* 359 * Send on a socket. 360 * If send must go all at once and message is larger than 361 * send buffering, then hard error. 362 * Lock against other senders. 363 * If must go all at once and not enough room now, then 364 * inform user that this would block and do nothing. 365 * Otherwise, if nonblocking, send as much as possible. 366 * The data to be sent is described by "uio" if nonzero, 367 * otherwise by the mbuf chain "top" (which must be null 368 * if uio is not). Data provided in mbuf chain must be small 369 * enough to send all at once. 370 * 371 * Returns nonzero on error, timeout or signal; callers 372 * must check for short counts if EINTR/ERESTART are returned. 373 * Data and control buffers are freed on return. 374 */ 375int 376sosend(so, addr, uio, top, control, flags, p) 377 register struct socket *so; 378 struct sockaddr *addr; 379 struct uio *uio; 380 struct mbuf *top; 381 struct mbuf *control; 382 int flags; 383 struct proc *p; 384{ 385 struct mbuf **mp; 386 register struct mbuf *m; 387 register long space, len, resid; 388 int clen = 0, error, s, dontroute, mlen; 389 int atomic = sosendallatonce(so) || top; 390 391 if (uio) 392 resid = uio->uio_resid; 393 else 394 resid = top->m_pkthdr.len; 395 /* 396 * In theory resid should be unsigned. 397 * However, space must be signed, as it might be less than 0 398 * if we over-committed, and we must use a signed comparison 399 * of space and resid. On the other hand, a negative resid 400 * causes us to loop sending 0-length segments to the protocol. 401 * 402 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 403 * type sockets since that's an error. 404 */ 405 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 406 error = EINVAL; 407 goto out; 408 } 409 410 dontroute = 411 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 412 (so->so_proto->pr_flags & PR_ATOMIC); 413 if (p) 414 p->p_stats->p_ru.ru_msgsnd++; 415 if (control) 416 clen = control->m_len; 417#define snderr(errno) { error = errno; splx(s); goto release; } 418 419restart: 420 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 421 if (error) 422 goto out; 423 do { 424 s = splnet(); 425 if (so->so_state & SS_CANTSENDMORE) 426 snderr(EPIPE); 427 if (so->so_error) { 428 error = so->so_error; 429 so->so_error = 0; 430 splx(s); 431 goto release; 432 } 433 if ((so->so_state & SS_ISCONNECTED) == 0) { 434 /* 435 * `sendto' and `sendmsg' is allowed on a connection- 436 * based socket if it supports implied connect. 437 * Return ENOTCONN if not connected and no address is 438 * supplied. 439 */ 440 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 441 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 442 if ((so->so_state & SS_ISCONFIRMING) == 0 && 443 !(resid == 0 && clen != 0)) 444 snderr(ENOTCONN); 445 } else if (addr == 0) 446 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 447 ENOTCONN : EDESTADDRREQ); 448 } 449 space = sbspace(&so->so_snd); 450 if (flags & MSG_OOB) 451 space += 1024; 452 if ((atomic && resid > so->so_snd.sb_hiwat) || 453 clen > so->so_snd.sb_hiwat) 454 snderr(EMSGSIZE); 455 if (space < resid + clen && uio && 456 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 457 if (so->so_state & SS_NBIO) 458 snderr(EWOULDBLOCK); 459 sbunlock(&so->so_snd); 460 error = sbwait(&so->so_snd); 461 splx(s); 462 if (error) 463 goto out; 464 goto restart; 465 } 466 splx(s); 467 mp = ⊤ 468 space -= clen; 469 do { 470 if (uio == NULL) { 471 /* 472 * Data is prepackaged in "top". 473 */ 474 resid = 0; 475 if (flags & MSG_EOR) 476 top->m_flags |= M_EOR; 477 } else do { 478 if (top == 0) { 479 MGETHDR(m, M_WAIT, MT_DATA); 480 mlen = MHLEN; 481 m->m_pkthdr.len = 0; 482 m->m_pkthdr.rcvif = (struct ifnet *)0; 483 } else { 484 MGET(m, M_WAIT, MT_DATA); 485 mlen = MLEN; 486 } 487 if (resid >= MINCLSIZE) { 488 MCLGET(m, M_WAIT); 489 if ((m->m_flags & M_EXT) == 0) 490 goto nopages; 491 mlen = MCLBYTES; 492 len = min(min(mlen, resid), space); 493 } else { 494 atomic = 1; 495nopages: 496 len = min(min(mlen, resid), space); 497 /* 498 * For datagram protocols, leave room 499 * for protocol headers in first mbuf. 500 */ 501 if (atomic && top == 0 && len < mlen) 502 MH_ALIGN(m, len); 503 } 504 space -= len; 505 error = uiomove(mtod(m, caddr_t), (int)len, uio); 506 resid = uio->uio_resid; 507 m->m_len = len; 508 *mp = m; 509 top->m_pkthdr.len += len; 510 if (error) 511 goto release; 512 mp = &m->m_next; 513 if (resid <= 0) { 514 if (flags & MSG_EOR) 515 top->m_flags |= M_EOR; 516 break; 517 } 518 } while (space > 0 && atomic); 519 if (dontroute) 520 so->so_options |= SO_DONTROUTE; 521 s = splnet(); /* XXX */ 522 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 523 (flags & MSG_OOB) ? PRUS_OOB : 524 /* 525 * If the user set MSG_EOF, the protocol 526 * understands this flag and nothing left to 527 * send then use PRU_SEND_EOF instead of PRU_SEND. 528 */ 529 ((flags & MSG_EOF) && 530 (so->so_proto->pr_flags & PR_IMPLOPCL) && 531 (resid <= 0)) ? 532 PRUS_EOF : 0, 533 top, addr, control, p); 534 splx(s); 535 if (dontroute) 536 so->so_options &= ~SO_DONTROUTE; 537 clen = 0; 538 control = 0; 539 top = 0; 540 mp = ⊤ 541 if (error) 542 goto release; 543 } while (resid && space > 0); 544 } while (resid); 545 546release: 547 sbunlock(&so->so_snd); 548out: 549 if (top) 550 m_freem(top); 551 if (control) 552 m_freem(control); 553 return (error); 554} 555 556/* 557 * Implement receive operations on a socket. 558 * We depend on the way that records are added to the sockbuf 559 * by sbappend*. In particular, each record (mbufs linked through m_next) 560 * must begin with an address if the protocol so specifies, 561 * followed by an optional mbuf or mbufs containing ancillary data, 562 * and then zero or more mbufs of data. 563 * In order to avoid blocking network interrupts for the entire time here, 564 * we splx() while doing the actual copy to user space. 565 * Although the sockbuf is locked, new data may still be appended, 566 * and thus we must maintain consistency of the sockbuf during that time. 567 * 568 * The caller may receive the data as a single mbuf chain by supplying 569 * an mbuf **mp0 for use in returning the chain. The uio is then used 570 * only for the count in uio_resid. 571 */ 572int 573soreceive(so, psa, uio, mp0, controlp, flagsp) 574 register struct socket *so; 575 struct sockaddr **psa; 576 struct uio *uio; 577 struct mbuf **mp0; 578 struct mbuf **controlp; 579 int *flagsp; 580{ 581 register struct mbuf *m, **mp; 582 register int flags, len, error, s, offset; 583 struct protosw *pr = so->so_proto; 584 struct mbuf *nextrecord; 585 int moff, type = 0; 586 int orig_resid = uio->uio_resid; 587 588 mp = mp0; 589 if (psa) 590 *psa = 0; 591 if (controlp) 592 *controlp = 0; 593 if (flagsp) 594 flags = *flagsp &~ MSG_EOR; 595 else 596 flags = 0; 597 if (flags & MSG_OOB) { 598 m = m_get(M_WAIT, MT_DATA); 599 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 600 if (error) 601 goto bad; 602 do { 603 error = uiomove(mtod(m, caddr_t), 604 (int) min(uio->uio_resid, m->m_len), uio); 605 m = m_free(m); 606 } while (uio->uio_resid && error == 0 && m); 607bad: 608 if (m) 609 m_freem(m); 610 return (error); 611 } 612 if (mp) 613 *mp = (struct mbuf *)0; 614 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 615 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 616 617restart: 618 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 619 if (error) 620 return (error); 621 s = splnet(); 622 623 m = so->so_rcv.sb_mb; 624 /* 625 * If we have less data than requested, block awaiting more 626 * (subject to any timeout) if: 627 * 1. the current count is less than the low water mark, or 628 * 2. MSG_WAITALL is set, and it is possible to do the entire 629 * receive operation at once if we block (resid <= hiwat). 630 * 3. MSG_DONTWAIT is not set 631 * If MSG_WAITALL is set but resid is larger than the receive buffer, 632 * we have to do the receive in sections, and thus risk returning 633 * a short count if a timeout or signal occurs after we start. 634 */ 635 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 636 so->so_rcv.sb_cc < uio->uio_resid) && 637 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 638 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 639 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 640#ifdef DIAGNOSTIC 641 if (m == 0 && so->so_rcv.sb_cc) 642 panic("receive 1"); 643#endif 644 if (so->so_error) { 645 if (m) 646 goto dontblock; 647 error = so->so_error; 648 if ((flags & MSG_PEEK) == 0) 649 so->so_error = 0; 650 goto release; 651 } 652 if (so->so_state & SS_CANTRCVMORE) { 653 if (m) 654 goto dontblock; 655 else 656 goto release; 657 } 658 for (; m; m = m->m_next) 659 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 660 m = so->so_rcv.sb_mb; 661 goto dontblock; 662 } 663 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 664 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 665 error = ENOTCONN; 666 goto release; 667 } 668 if (uio->uio_resid == 0) 669 goto release; 670 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 671 error = EWOULDBLOCK; 672 goto release; 673 } 674 sbunlock(&so->so_rcv); 675 error = sbwait(&so->so_rcv); 676 splx(s); 677 if (error) 678 return (error); 679 goto restart; 680 } 681dontblock: 682 if (uio->uio_procp) 683 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 684 nextrecord = m->m_nextpkt; 685 if (pr->pr_flags & PR_ADDR) { 686#ifdef DIAGNOSTIC 687 if (m->m_type != MT_SONAME) 688 panic("receive 1a"); 689#endif 690 orig_resid = 0; 691 if (psa) 692 *psa = dup_sockaddr(mtod(m, struct sockaddr *), 693 mp0 == 0); 694 if (flags & MSG_PEEK) { 695 m = m->m_next; 696 } else { 697 sbfree(&so->so_rcv, m); 698 MFREE(m, so->so_rcv.sb_mb); 699 m = so->so_rcv.sb_mb; 700 } 701 } 702 while (m && m->m_type == MT_CONTROL && error == 0) { 703 if (flags & MSG_PEEK) { 704 if (controlp) 705 *controlp = m_copy(m, 0, m->m_len); 706 m = m->m_next; 707 } else { 708 sbfree(&so->so_rcv, m); 709 if (controlp) { 710 if (pr->pr_domain->dom_externalize && 711 mtod(m, struct cmsghdr *)->cmsg_type == 712 SCM_RIGHTS) 713 error = (*pr->pr_domain->dom_externalize)(m); 714 *controlp = m; 715 so->so_rcv.sb_mb = m->m_next; 716 m->m_next = 0; 717 m = so->so_rcv.sb_mb; 718 } else { 719 MFREE(m, so->so_rcv.sb_mb); 720 m = so->so_rcv.sb_mb; 721 } 722 } 723 if (controlp) { 724 orig_resid = 0; 725 controlp = &(*controlp)->m_next; 726 } 727 } 728 if (m) { 729 if ((flags & MSG_PEEK) == 0) 730 m->m_nextpkt = nextrecord; 731 type = m->m_type; 732 if (type == MT_OOBDATA) 733 flags |= MSG_OOB; 734 } 735 moff = 0; 736 offset = 0; 737 while (m && uio->uio_resid > 0 && error == 0) { 738 if (m->m_type == MT_OOBDATA) { 739 if (type != MT_OOBDATA) 740 break; 741 } else if (type == MT_OOBDATA) 742 break; 743#ifdef DIAGNOSTIC 744 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 745 panic("receive 3"); 746#endif 747 so->so_state &= ~SS_RCVATMARK; 748 len = uio->uio_resid; 749 if (so->so_oobmark && len > so->so_oobmark - offset) 750 len = so->so_oobmark - offset; 751 if (len > m->m_len - moff) 752 len = m->m_len - moff; 753 /* 754 * If mp is set, just pass back the mbufs. 755 * Otherwise copy them out via the uio, then free. 756 * Sockbuf must be consistent here (points to current mbuf, 757 * it points to next record) when we drop priority; 758 * we must note any additions to the sockbuf when we 759 * block interrupts again. 760 */ 761 if (mp == 0) { 762 splx(s); 763 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 764 s = splnet(); 765 if (error) 766 goto release; 767 } else 768 uio->uio_resid -= len; 769 if (len == m->m_len - moff) { 770 if (m->m_flags & M_EOR) 771 flags |= MSG_EOR; 772 if (flags & MSG_PEEK) { 773 m = m->m_next; 774 moff = 0; 775 } else { 776 nextrecord = m->m_nextpkt; 777 sbfree(&so->so_rcv, m); 778 if (mp) { 779 *mp = m; 780 mp = &m->m_next; 781 so->so_rcv.sb_mb = m = m->m_next; 782 *mp = (struct mbuf *)0; 783 } else { 784 MFREE(m, so->so_rcv.sb_mb); 785 m = so->so_rcv.sb_mb; 786 } 787 if (m) 788 m->m_nextpkt = nextrecord; 789 } 790 } else { 791 if (flags & MSG_PEEK) 792 moff += len; 793 else { 794 if (mp) 795 *mp = m_copym(m, 0, len, M_WAIT); 796 m->m_data += len; 797 m->m_len -= len; 798 so->so_rcv.sb_cc -= len; 799 } 800 } 801 if (so->so_oobmark) { 802 if ((flags & MSG_PEEK) == 0) { 803 so->so_oobmark -= len; 804 if (so->so_oobmark == 0) { 805 so->so_state |= SS_RCVATMARK; 806 break; 807 } 808 } else { 809 offset += len; 810 if (offset == so->so_oobmark) 811 break; 812 } 813 } 814 if (flags & MSG_EOR) 815 break; 816 /* 817 * If the MSG_WAITALL flag is set (for non-atomic socket), 818 * we must not quit until "uio->uio_resid == 0" or an error 819 * termination. If a signal/timeout occurs, return 820 * with a short count but without error. 821 * Keep sockbuf locked against other readers. 822 */ 823 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 824 !sosendallatonce(so) && !nextrecord) { 825 if (so->so_error || so->so_state & SS_CANTRCVMORE) 826 break; 827 error = sbwait(&so->so_rcv); 828 if (error) { 829 sbunlock(&so->so_rcv); 830 splx(s); 831 return (0); 832 } 833 m = so->so_rcv.sb_mb; 834 if (m) 835 nextrecord = m->m_nextpkt; 836 } 837 } 838 839 if (m && pr->pr_flags & PR_ATOMIC) { 840 flags |= MSG_TRUNC; 841 if ((flags & MSG_PEEK) == 0) 842 (void) sbdroprecord(&so->so_rcv); 843 } 844 if ((flags & MSG_PEEK) == 0) { 845 if (m == 0) 846 so->so_rcv.sb_mb = nextrecord; 847 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 848 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 849 } 850 if (orig_resid == uio->uio_resid && orig_resid && 851 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 852 sbunlock(&so->so_rcv); 853 splx(s); 854 goto restart; 855 } 856 857 if (flagsp) 858 *flagsp |= flags; 859release: 860 sbunlock(&so->so_rcv); 861 splx(s); 862 return (error); 863} 864 865int 866soshutdown(so, how) 867 register struct socket *so; 868 register int how; 869{ 870 register struct protosw *pr = so->so_proto; 871 872 how++; 873 if (how & FREAD) 874 sorflush(so); 875 if (how & FWRITE) 876 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 877 return (0); 878} 879 880void 881sorflush(so) 882 register struct socket *so; 883{ 884 register struct sockbuf *sb = &so->so_rcv; 885 register struct protosw *pr = so->so_proto; 886 register int s; 887 struct sockbuf asb; 888 889 sb->sb_flags |= SB_NOINTR; 890 (void) sblock(sb, M_WAITOK); 891 s = splimp(); 892 socantrcvmore(so); 893 sbunlock(sb); 894 asb = *sb; 895 bzero((caddr_t)sb, sizeof (*sb)); 896 splx(s); 897 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 898 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 899 sbrelease(&asb); 900} 901 902int 903sosetopt(so, level, optname, m0, p) 904 register struct socket *so; 905 int level, optname; 906 struct mbuf *m0; 907 struct proc *p; 908{ 909 int error = 0; 910 register struct mbuf *m = m0; 911 912 if (level != SOL_SOCKET) { 913 if (so->so_proto && so->so_proto->pr_ctloutput) 914 return ((*so->so_proto->pr_ctloutput) 915 (PRCO_SETOPT, so, level, optname, &m0, p)); 916 error = ENOPROTOOPT; 917 } else { 918 switch (optname) { 919 920 case SO_LINGER: 921 if (m == NULL || m->m_len != sizeof (struct linger)) { 922 error = EINVAL; 923 goto bad; 924 } 925 so->so_linger = mtod(m, struct linger *)->l_linger; 926 /* fall thru... */ 927 928 case SO_DEBUG: 929 case SO_KEEPALIVE: 930 case SO_DONTROUTE: 931 case SO_USELOOPBACK: 932 case SO_BROADCAST: 933 case SO_REUSEADDR: 934 case SO_REUSEPORT: 935 case SO_OOBINLINE: 936 case SO_TIMESTAMP: 937 if (m == NULL || m->m_len < sizeof (int)) { 938 error = EINVAL; 939 goto bad; 940 } 941 if (*mtod(m, int *)) 942 so->so_options |= optname; 943 else 944 so->so_options &= ~optname; 945 break; 946 947 case SO_SNDBUF: 948 case SO_RCVBUF: 949 case SO_SNDLOWAT: 950 case SO_RCVLOWAT: 951 { 952 int optval; 953 954 if (m == NULL || m->m_len < sizeof (int)) { 955 error = EINVAL; 956 goto bad; 957 } 958 959 /* 960 * Values < 1 make no sense for any of these 961 * options, so disallow them. 962 */ 963 optval = *mtod(m, int *); 964 if (optval < 1) { 965 error = EINVAL; 966 goto bad; 967 } 968 969 switch (optname) { 970 971 case SO_SNDBUF: 972 case SO_RCVBUF: 973 if (sbreserve(optname == SO_SNDBUF ? 974 &so->so_snd : &so->so_rcv, 975 (u_long) optval) == 0) { 976 error = ENOBUFS; 977 goto bad; 978 } 979 break; 980 981 /* 982 * Make sure the low-water is never greater than 983 * the high-water. 984 */ 985 case SO_SNDLOWAT: 986 so->so_snd.sb_lowat = 987 (optval > so->so_snd.sb_hiwat) ? 988 so->so_snd.sb_hiwat : optval; 989 break; 990 case SO_RCVLOWAT: 991 so->so_rcv.sb_lowat = 992 (optval > so->so_rcv.sb_hiwat) ? 993 so->so_rcv.sb_hiwat : optval; 994 break; 995 } 996 break; 997 } 998 999 case SO_SNDTIMEO: 1000 case SO_RCVTIMEO: 1001 { 1002 struct timeval *tv; 1003 short val; 1004 1005 if (m == NULL || m->m_len < sizeof (*tv)) { 1006 error = EINVAL; 1007 goto bad; 1008 } 1009 tv = mtod(m, struct timeval *); 1010 if (tv->tv_sec > SHRT_MAX / hz - hz) { 1011 error = EDOM; 1012 goto bad; 1013 } 1014 val = tv->tv_sec * hz + tv->tv_usec / tick; 1015 1016 switch (optname) { 1017 1018 case SO_SNDTIMEO: 1019 so->so_snd.sb_timeo = val; 1020 break; 1021 case SO_RCVTIMEO: 1022 so->so_rcv.sb_timeo = val; 1023 break; 1024 } 1025 break; 1026 } 1027 1028 default: 1029 error = ENOPROTOOPT; 1030 break; 1031 } 1032 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1033 (void) ((*so->so_proto->pr_ctloutput) 1034 (PRCO_SETOPT, so, level, optname, &m0, p)); 1035 m = NULL; /* freed by protocol */ 1036 } 1037 } 1038bad: 1039 if (m) 1040 (void) m_free(m); 1041 return (error); 1042} 1043 1044int 1045sogetopt(so, level, optname, mp, p) 1046 register struct socket *so; 1047 int level, optname; 1048 struct mbuf **mp; 1049 struct proc *p; 1050{ 1051 register struct mbuf *m; 1052 1053 if (level != SOL_SOCKET) { 1054 if (so->so_proto && so->so_proto->pr_ctloutput) { 1055 return ((*so->so_proto->pr_ctloutput) 1056 (PRCO_GETOPT, so, level, optname, mp, p)); 1057 } else 1058 return (ENOPROTOOPT); 1059 } else { 1060 m = m_get(M_WAIT, MT_SOOPTS); 1061 m->m_len = sizeof (int); 1062 1063 switch (optname) { 1064 1065 case SO_LINGER: 1066 m->m_len = sizeof (struct linger); 1067 mtod(m, struct linger *)->l_onoff = 1068 so->so_options & SO_LINGER; 1069 mtod(m, struct linger *)->l_linger = so->so_linger; 1070 break; 1071 1072 case SO_USELOOPBACK: 1073 case SO_DONTROUTE: 1074 case SO_DEBUG: 1075 case SO_KEEPALIVE: 1076 case SO_REUSEADDR: 1077 case SO_REUSEPORT: 1078 case SO_BROADCAST: 1079 case SO_OOBINLINE: 1080 case SO_TIMESTAMP: 1081 *mtod(m, int *) = so->so_options & optname; 1082 break; 1083 1084 case SO_TYPE: 1085 *mtod(m, int *) = so->so_type; 1086 break; 1087 1088 case SO_ERROR: 1089 *mtod(m, int *) = so->so_error; 1090 so->so_error = 0; 1091 break; 1092 1093 case SO_SNDBUF: 1094 *mtod(m, int *) = so->so_snd.sb_hiwat; 1095 break; 1096 1097 case SO_RCVBUF: 1098 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1099 break; 1100 1101 case SO_SNDLOWAT: 1102 *mtod(m, int *) = so->so_snd.sb_lowat; 1103 break; 1104 1105 case SO_RCVLOWAT: 1106 *mtod(m, int *) = so->so_rcv.sb_lowat; 1107 break; 1108 1109 case SO_SNDTIMEO: 1110 case SO_RCVTIMEO: 1111 { 1112 int val = (optname == SO_SNDTIMEO ? 1113 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1114 1115 m->m_len = sizeof(struct timeval); 1116 mtod(m, struct timeval *)->tv_sec = val / hz; 1117 mtod(m, struct timeval *)->tv_usec = 1118 (val % hz) * tick; 1119 break; 1120 } 1121 1122 default: 1123 (void)m_free(m); 1124 return (ENOPROTOOPT); 1125 } 1126 *mp = m; 1127 return (0); 1128 } 1129} 1130 1131void 1132sohasoutofband(so) 1133 register struct socket *so; 1134{ 1135 struct proc *p; 1136 1137 if (so->so_pgid < 0) 1138 gsignal(-so->so_pgid, SIGURG); 1139 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1140 psignal(p, SIGURG); 1141 selwakeup(&so->so_rcv.sb_sel); 1142} 1143 1144int 1145sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1146{ 1147 int revents = 0; 1148 int s = splnet(); 1149 1150 if (events & (POLLIN | POLLRDNORM)) 1151 if (soreadable(so)) 1152 revents |= events & (POLLIN | POLLRDNORM); 1153 1154 if (events & (POLLOUT | POLLWRNORM)) 1155 if (sowriteable(so)) 1156 revents |= events & (POLLOUT | POLLWRNORM); 1157 1158 if (events & (POLLPRI | POLLRDBAND)) 1159 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1160 revents |= events & (POLLPRI | POLLRDBAND); 1161 1162 if (revents == 0) { 1163 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1164 selrecord(p, &so->so_rcv.sb_sel); 1165 so->so_rcv.sb_flags |= SB_SEL; 1166 } 1167 1168 if (events & (POLLOUT | POLLWRNORM)) { 1169 selrecord(p, &so->so_snd.sb_sel); 1170 so->so_snd.sb_flags |= SB_SEL; 1171 } 1172 } 1173 1174 splx(s); 1175 return (revents); 1176} 1177