uipc_socket.c revision 53541
1109998Smarkm/* 2296341Sdelphij * Copyright (c) 1982, 1986, 1988, 1990, 1993 3296341Sdelphij * The Regents of the University of California. All rights reserved. 4296341Sdelphij * 5109998Smarkm * Redistribution and use in source and binary forms, with or without 6109998Smarkm * modification, are permitted provided that the following conditions 7109998Smarkm * are met: 8109998Smarkm * 1. Redistributions of source code must retain the above copyright 9109998Smarkm * notice, this list of conditions and the following disclaimer. 10109998Smarkm * 2. Redistributions in binary form must reproduce the above copyright 11109998Smarkm * notice, this list of conditions and the following disclaimer in the 12109998Smarkm * documentation and/or other materials provided with the distribution. 13109998Smarkm * 3. All advertising materials mentioning features or use of this software 14296341Sdelphij * must display the following acknowledgement: 15109998Smarkm * This product includes software developed by the University of 16109998Smarkm * California, Berkeley and its contributors. 17109998Smarkm * 4. Neither the name of the University nor the names of its contributors 18109998Smarkm * may be used to endorse or promote products derived from this software 19109998Smarkm * without specific prior written permission. 20109998Smarkm * 21109998Smarkm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22109998Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23109998Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24109998Smarkm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25109998Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26109998Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27109998Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28109998Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29109998Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30109998Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31109998Smarkm * SUCH DAMAGE. 32109998Smarkm * 33109998Smarkm * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 34109998Smarkm * $FreeBSD: head/sys/kern/uipc_socket.c 53541 1999-11-22 02:45:11Z shin $ 35109998Smarkm */ 36109998Smarkm 37109998Smarkm#include <sys/param.h> 38109998Smarkm#include <sys/systm.h> 39109998Smarkm#include <sys/fcntl.h> 40109998Smarkm#include <sys/malloc.h> 41109998Smarkm#include <sys/mbuf.h> 42109998Smarkm#include <sys/domain.h> 43109998Smarkm#include <sys/kernel.h> 44109998Smarkm#include <sys/malloc.h> 45109998Smarkm#include <sys/poll.h> 46109998Smarkm#include <sys/proc.h> 47109998Smarkm#include <sys/protosw.h> 48109998Smarkm#include <sys/socket.h> 49109998Smarkm#include <sys/socketvar.h> 50109998Smarkm#include <sys/resourcevar.h> 51109998Smarkm#include <sys/signalvar.h> 52109998Smarkm#include <sys/sysctl.h> 53109998Smarkm#include <sys/uio.h> 54109998Smarkm#include <vm/vm_zone.h> 55109998Smarkm 56109998Smarkm#include <machine/limits.h> 57109998Smarkm 58109998Smarkmstruct vm_zone *socket_zone; 59111147Snectarso_gen_t so_gencnt; /* generation count for sockets */ 60238405Sjkim 61296341SdelphijMALLOC_DEFINE(M_SONAME, "soname", "socket name"); 62296341SdelphijMALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 63296341Sdelphij 64296341SdelphijSYSCTL_DECL(_kern_ipc); 65238405Sjkim 66296341Sdelphijstatic int somaxconn = SOMAXCONN; 67238405SjkimSYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 68296341Sdelphij &somaxconn, 0, "Maximum pending socket connection queue size"); 69296341Sdelphij 70296341Sdelphij/* 71296341Sdelphij * Socket operation routines. 72296341Sdelphij * These routines are called by the routines in 73296341Sdelphij * sys_socket.c or from a system process, and 74296341Sdelphij * implement the semantics of socket operations by 75296341Sdelphij * switching out to the protocol specific routines. 76296341Sdelphij */ 77296341Sdelphij 78296341Sdelphij/* 79296341Sdelphij * Get a socket structure from our zone, and initialize it. 80296341Sdelphij * We don't implement `waitok' yet (see comments in uipc_domain.c). 81109998Smarkm * Note that it would probably be better to allocate socket 82296341Sdelphij * and PCB at the same time, but I'm not convinced that all 83238405Sjkim * the protocols can be easily modified to do this. 84296341Sdelphij */ 85296341Sdelphijstruct socket * 86238405Sjkimsoalloc(waitok) 87296341Sdelphij int waitok; 88238405Sjkim{ 89296341Sdelphij struct socket *so; 90238405Sjkim 91296341Sdelphij so = zalloci(socket_zone); 92238405Sjkim if (so) { 93296341Sdelphij /* XXX race condition for reentrant kernel */ 94238405Sjkim bzero(so, sizeof *so); 95296341Sdelphij so->so_gencnt = ++so_gencnt; 96296341Sdelphij so->so_zone = socket_zone; 97109998Smarkm } 98296341Sdelphij return so; 99109998Smarkm} 100296341Sdelphij 101296341Sdelphijint 102296341Sdelphijsocreate(dom, aso, type, proto, p) 103296341Sdelphij int dom; 104296341Sdelphij struct socket **aso; 105296341Sdelphij register int type; 106109998Smarkm int proto; 107296341Sdelphij struct proc *p; 108296341Sdelphij{ 109296341Sdelphij register struct protosw *prp; 110109998Smarkm register struct socket *so; 111296341Sdelphij register int error; 112296341Sdelphij 113296341Sdelphij if (proto) 114296341Sdelphij prp = pffindproto(dom, proto, type); 115109998Smarkm else 116127128Snectar prp = pffindtype(dom, type); 117109998Smarkm if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 118296341Sdelphij return (EPROTONOSUPPORT); 119296341Sdelphij if (prp->pr_type != type) 120109998Smarkm return (EPROTOTYPE); 121194206Ssimon so = soalloc(p != 0); 122296341Sdelphij if (so == 0) 123296341Sdelphij return (ENOBUFS); 124109998Smarkm 125296341Sdelphij TAILQ_INIT(&so->so_incomp); 126296341Sdelphij TAILQ_INIT(&so->so_comp); 127109998Smarkm so->so_type = type; 128109998Smarkm so->so_cred = p->p_ucred; 129109998Smarkm crhold(so->so_cred); 130109998Smarkm so->so_proto = prp; 131296341Sdelphij error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); 132296341Sdelphij if (error) { 133296341Sdelphij so->so_state |= SS_NOFDREF; 134296341Sdelphij sofree(so); 135296341Sdelphij return (error); 136296341Sdelphij } 137296341Sdelphij *aso = so; 138296341Sdelphij return (0); 139296341Sdelphij} 140296341Sdelphij 141296341Sdelphijint 142296341Sdelphijsobind(so, nam, p) 143296341Sdelphij struct socket *so; 144296341Sdelphij struct sockaddr *nam; 145296341Sdelphij struct proc *p; 146296341Sdelphij{ 147296341Sdelphij int s = splnet(); 148296341Sdelphij int error; 149296341Sdelphij 150296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); 151296341Sdelphij splx(s); 152296341Sdelphij return (error); 153296341Sdelphij} 154296341Sdelphij 155296341Sdelphijvoid 156296341Sdelphijsodealloc(so) 157296341Sdelphij struct socket *so; 158296341Sdelphij{ 159296341Sdelphij 160296341Sdelphij so->so_gencnt = ++so_gencnt; 161296341Sdelphij if (so->so_rcv.sb_hiwat) 162296341Sdelphij (void)chgsbsize(so->so_cred->cr_uid, 163296341Sdelphij -(rlim_t)so->so_rcv.sb_hiwat); 164296341Sdelphij if (so->so_snd.sb_hiwat) 165296341Sdelphij (void)chgsbsize(so->so_cred->cr_uid, 166296341Sdelphij -(rlim_t)so->so_snd.sb_hiwat); 167109998Smarkm crfree(so->so_cred); 168296341Sdelphij zfreei(so->so_zone, so); 169296341Sdelphij} 170296341Sdelphij 171296341Sdelphijint 172296341Sdelphijsolisten(so, backlog, p) 173296341Sdelphij register struct socket *so; 174109998Smarkm int backlog; 175296341Sdelphij struct proc *p; 176296341Sdelphij{ 177109998Smarkm int s, error; 178296341Sdelphij 179296341Sdelphij s = splnet(); 180296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); 181296341Sdelphij if (error) { 182296341Sdelphij splx(s); 183296341Sdelphij return (error); 184296341Sdelphij } 185296341Sdelphij if (TAILQ_EMPTY(&so->so_comp)) 186296341Sdelphij so->so_options |= SO_ACCEPTCONN; 187296341Sdelphij if (backlog < 0 || backlog > somaxconn) 188296341Sdelphij backlog = somaxconn; 189296341Sdelphij so->so_qlimit = backlog; 190296341Sdelphij splx(s); 191296341Sdelphij return (0); 192296341Sdelphij} 193296341Sdelphij 194296341Sdelphijvoid 195296341Sdelphijsofree(so) 196296341Sdelphij register struct socket *so; 197296341Sdelphij{ 198296341Sdelphij struct socket *head = so->so_head; 199296341Sdelphij 200296341Sdelphij if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 201296341Sdelphij return; 202296341Sdelphij if (head != NULL) { 203296341Sdelphij if (so->so_state & SS_INCOMP) { 204296341Sdelphij TAILQ_REMOVE(&head->so_incomp, so, so_list); 205296341Sdelphij head->so_incqlen--; 206296341Sdelphij } else if (so->so_state & SS_COMP) { 207296341Sdelphij /* 208296341Sdelphij * We must not decommission a socket that's 209296341Sdelphij * on the accept(2) queue. If we do, then 210296341Sdelphij * accept(2) may hang after select(2) indicated 211296341Sdelphij * that the listening socket was ready. 212296341Sdelphij */ 213296341Sdelphij return; 214296341Sdelphij } else { 215296341Sdelphij panic("sofree: not queued"); 216296341Sdelphij } 217296341Sdelphij head->so_qlen--; 218296341Sdelphij so->so_state &= ~SS_INCOMP; 219296341Sdelphij so->so_head = NULL; 220296341Sdelphij } 221296341Sdelphij sbrelease(&so->so_snd, so); 222296341Sdelphij sorflush(so); 223296341Sdelphij sodealloc(so); 224296341Sdelphij} 225296341Sdelphij 226296341Sdelphij/* 227296341Sdelphij * Close a socket on last file table reference removal. 228296341Sdelphij * Initiate disconnect if connected. 229296341Sdelphij * Free socket when disconnect complete. 230296341Sdelphij */ 231296341Sdelphijint 232296341Sdelphijsoclose(so) 233296341Sdelphij register struct socket *so; 234296341Sdelphij{ 235296341Sdelphij int s = splnet(); /* conservative */ 236296341Sdelphij int error = 0; 237296341Sdelphij 238296341Sdelphij funsetown(so->so_sigio); 239296341Sdelphij if (so->so_options & SO_ACCEPTCONN) { 240296341Sdelphij struct socket *sp, *sonext; 241296341Sdelphij 242296341Sdelphij sp = TAILQ_FIRST(&so->so_incomp); 243296341Sdelphij for (; sp != NULL; sp = sonext) { 244296341Sdelphij sonext = TAILQ_NEXT(sp, so_list); 245296341Sdelphij (void) soabort(sp); 246296341Sdelphij } 247296341Sdelphij for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { 248296341Sdelphij sonext = TAILQ_NEXT(sp, so_list); 249296341Sdelphij /* Dequeue from so_comp since sofree() won't do it */ 250296341Sdelphij TAILQ_REMOVE(&so->so_comp, sp, so_list); 251296341Sdelphij so->so_qlen--; 252296341Sdelphij sp->so_state &= ~SS_COMP; 253296341Sdelphij sp->so_head = NULL; 254296341Sdelphij (void) soabort(sp); 255296341Sdelphij } 256296341Sdelphij } 257296341Sdelphij if (so->so_pcb == 0) 258296341Sdelphij goto discard; 259296341Sdelphij if (so->so_state & SS_ISCONNECTED) { 260296341Sdelphij if ((so->so_state & SS_ISDISCONNECTING) == 0) { 261296341Sdelphij error = sodisconnect(so); 262296341Sdelphij if (error) 263296341Sdelphij goto drop; 264296341Sdelphij } 265296341Sdelphij if (so->so_options & SO_LINGER) { 266296341Sdelphij if ((so->so_state & SS_ISDISCONNECTING) && 267296341Sdelphij (so->so_state & SS_NBIO)) 268296341Sdelphij goto drop; 269296341Sdelphij while (so->so_state & SS_ISCONNECTED) { 270296341Sdelphij error = tsleep((caddr_t)&so->so_timeo, 271296341Sdelphij PSOCK | PCATCH, "soclos", so->so_linger * hz); 272296341Sdelphij if (error) 273296341Sdelphij break; 274296341Sdelphij } 275296341Sdelphij } 276296341Sdelphij } 277296341Sdelphijdrop: 278296341Sdelphij if (so->so_pcb) { 279296341Sdelphij int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); 280296341Sdelphij if (error == 0) 281296341Sdelphij error = error2; 282296341Sdelphij } 283296341Sdelphijdiscard: 284296341Sdelphij if (so->so_state & SS_NOFDREF) 285296341Sdelphij panic("soclose: NOFDREF"); 286296341Sdelphij so->so_state |= SS_NOFDREF; 287296341Sdelphij sofree(so); 288296341Sdelphij splx(s); 289296341Sdelphij return (error); 290296341Sdelphij} 291296341Sdelphij 292296341Sdelphij/* 293296341Sdelphij * Must be called at splnet... 294296341Sdelphij */ 295296341Sdelphijint 296296341Sdelphijsoabort(so) 297296341Sdelphij struct socket *so; 298296341Sdelphij{ 299296341Sdelphij 300296341Sdelphij return (*so->so_proto->pr_usrreqs->pru_abort)(so); 301296341Sdelphij} 302296341Sdelphij 303296341Sdelphijint 304296341Sdelphijsoaccept(so, nam) 305296341Sdelphij register struct socket *so; 306296341Sdelphij struct sockaddr **nam; 307296341Sdelphij{ 308296341Sdelphij int s = splnet(); 309296341Sdelphij int error; 310296341Sdelphij 311296341Sdelphij if ((so->so_state & SS_NOFDREF) == 0) 312296341Sdelphij panic("soaccept: !NOFDREF"); 313296341Sdelphij so->so_state &= ~SS_NOFDREF; 314296341Sdelphij if ((so->so_state & SS_ISDISCONNECTED) == 0) 315296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 316296341Sdelphij else { 317296341Sdelphij if (nam) 318296341Sdelphij *nam = 0; 319296341Sdelphij error = 0; 320296341Sdelphij } 321296341Sdelphij splx(s); 322296341Sdelphij return (error); 323296341Sdelphij} 324296341Sdelphij 325296341Sdelphijint 326296341Sdelphijsoconnect(so, nam, p) 327296341Sdelphij register struct socket *so; 328296341Sdelphij struct sockaddr *nam; 329296341Sdelphij struct proc *p; 330296341Sdelphij{ 331296341Sdelphij int s; 332296341Sdelphij int error; 333296341Sdelphij 334296341Sdelphij if (so->so_options & SO_ACCEPTCONN) 335296341Sdelphij return (EOPNOTSUPP); 336296341Sdelphij s = splnet(); 337296341Sdelphij /* 338296341Sdelphij * If protocol is connection-based, can only connect once. 339296341Sdelphij * Otherwise, if connected, try to disconnect first. 340296341Sdelphij * This allows user to disconnect by connecting to, e.g., 341296341Sdelphij * a null address. 342296341Sdelphij */ 343296341Sdelphij if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 344296341Sdelphij ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 345296341Sdelphij (error = sodisconnect(so)))) 346296341Sdelphij error = EISCONN; 347296341Sdelphij else 348296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); 349296341Sdelphij splx(s); 350296341Sdelphij return (error); 351296341Sdelphij} 352296341Sdelphij 353296341Sdelphijint 354296341Sdelphijsoconnect2(so1, so2) 355296341Sdelphij register struct socket *so1; 356296341Sdelphij struct socket *so2; 357296341Sdelphij{ 358296341Sdelphij int s = splnet(); 359296341Sdelphij int error; 360296341Sdelphij 361296341Sdelphij error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); 362296341Sdelphij splx(s); 363296341Sdelphij return (error); 364296341Sdelphij} 365296341Sdelphij 366296341Sdelphijint 367296341Sdelphijsodisconnect(so) 368296341Sdelphij register struct socket *so; 369296341Sdelphij{ 370296341Sdelphij int s = splnet(); 371296341Sdelphij int error; 372296341Sdelphij 373296341Sdelphij if ((so->so_state & SS_ISCONNECTED) == 0) { 374296341Sdelphij error = ENOTCONN; 375296341Sdelphij goto bad; 376296341Sdelphij } 377296341Sdelphij if (so->so_state & SS_ISDISCONNECTING) { 378296341Sdelphij error = EALREADY; 379296341Sdelphij goto bad; 380296341Sdelphij } 381296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 382296341Sdelphijbad: 383296341Sdelphij splx(s); 384296341Sdelphij return (error); 385296341Sdelphij} 386296341Sdelphij 387296341Sdelphij#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 388296341Sdelphij/* 389296341Sdelphij * Send on a socket. 390296341Sdelphij * If send must go all at once and message is larger than 391296341Sdelphij * send buffering, then hard error. 392296341Sdelphij * Lock against other senders. 393296341Sdelphij * If must go all at once and not enough room now, then 394296341Sdelphij * inform user that this would block and do nothing. 395296341Sdelphij * Otherwise, if nonblocking, send as much as possible. 396296341Sdelphij * The data to be sent is described by "uio" if nonzero, 397296341Sdelphij * otherwise by the mbuf chain "top" (which must be null 398296341Sdelphij * if uio is not). Data provided in mbuf chain must be small 399296341Sdelphij * enough to send all at once. 400296341Sdelphij * 401296341Sdelphij * Returns nonzero on error, timeout or signal; callers 402296341Sdelphij * must check for short counts if EINTR/ERESTART are returned. 403296341Sdelphij * Data and control buffers are freed on return. 404296341Sdelphij */ 405296341Sdelphijint 406296341Sdelphijsosend(so, addr, uio, top, control, flags, p) 407296341Sdelphij register struct socket *so; 408296341Sdelphij struct sockaddr *addr; 409296341Sdelphij struct uio *uio; 410296341Sdelphij struct mbuf *top; 411296341Sdelphij struct mbuf *control; 412296341Sdelphij int flags; 413296341Sdelphij struct proc *p; 414296341Sdelphij{ 415296341Sdelphij struct mbuf **mp; 416296341Sdelphij register struct mbuf *m; 417296341Sdelphij register long space, len, resid; 418296341Sdelphij int clen = 0, error, s, dontroute, mlen; 419296341Sdelphij int atomic = sosendallatonce(so) || top; 420296341Sdelphij 421296341Sdelphij if (uio) 422296341Sdelphij resid = uio->uio_resid; 423296341Sdelphij else 424296341Sdelphij resid = top->m_pkthdr.len; 425296341Sdelphij /* 426296341Sdelphij * In theory resid should be unsigned. 427296341Sdelphij * However, space must be signed, as it might be less than 0 428296341Sdelphij * if we over-committed, and we must use a signed comparison 429296341Sdelphij * of space and resid. On the other hand, a negative resid 430296341Sdelphij * causes us to loop sending 0-length segments to the protocol. 431296341Sdelphij * 432296341Sdelphij * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 433296341Sdelphij * type sockets since that's an error. 434296341Sdelphij */ 435296341Sdelphij if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 436296341Sdelphij error = EINVAL; 437296341Sdelphij goto out; 438296341Sdelphij } 439296341Sdelphij 440296341Sdelphij dontroute = 441296341Sdelphij (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 442296341Sdelphij (so->so_proto->pr_flags & PR_ATOMIC); 443296341Sdelphij if (p) 444296341Sdelphij p->p_stats->p_ru.ru_msgsnd++; 445296341Sdelphij if (control) 446296341Sdelphij clen = control->m_len; 447296341Sdelphij#define snderr(errno) { error = errno; splx(s); goto release; } 448296341Sdelphij 449296341Sdelphijrestart: 450296341Sdelphij error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 451296341Sdelphij if (error) 452296341Sdelphij goto out; 453296341Sdelphij do { 454296341Sdelphij s = splnet(); 455296341Sdelphij if (so->so_state & SS_CANTSENDMORE) 456296341Sdelphij snderr(EPIPE); 457296341Sdelphij if (so->so_error) { 458296341Sdelphij error = so->so_error; 459296341Sdelphij so->so_error = 0; 460296341Sdelphij splx(s); 461296341Sdelphij goto release; 462296341Sdelphij } 463296341Sdelphij if ((so->so_state & SS_ISCONNECTED) == 0) { 464296341Sdelphij /* 465296341Sdelphij * `sendto' and `sendmsg' is allowed on a connection- 466296341Sdelphij * based socket if it supports implied connect. 467296341Sdelphij * Return ENOTCONN if not connected and no address is 468296341Sdelphij * supplied. 469296341Sdelphij */ 470296341Sdelphij if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 471296341Sdelphij (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 472296341Sdelphij if ((so->so_state & SS_ISCONFIRMING) == 0 && 473109998Smarkm !(resid == 0 && clen != 0)) 474296341Sdelphij snderr(ENOTCONN); 475296341Sdelphij } else if (addr == 0) 476296341Sdelphij snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 477109998Smarkm ENOTCONN : EDESTADDRREQ); 478296341Sdelphij } 479296341Sdelphij space = sbspace(&so->so_snd); 480296341Sdelphij if (flags & MSG_OOB) 481296341Sdelphij space += 1024; 482296341Sdelphij if ((atomic && resid > so->so_snd.sb_hiwat) || 483296341Sdelphij clen > so->so_snd.sb_hiwat) 484296341Sdelphij snderr(EMSGSIZE); 485296341Sdelphij if (space < resid + clen && uio && 486296341Sdelphij (atomic || space < so->so_snd.sb_lowat || space < clen)) { 487296341Sdelphij if (so->so_state & SS_NBIO) 488296341Sdelphij snderr(EWOULDBLOCK); 489296341Sdelphij sbunlock(&so->so_snd); 490296341Sdelphij error = sbwait(&so->so_snd); 491296341Sdelphij splx(s); 492296341Sdelphij if (error) 493296341Sdelphij goto out; 494296341Sdelphij goto restart; 495296341Sdelphij } 496296341Sdelphij splx(s); 497296341Sdelphij mp = ⊤ 498296341Sdelphij space -= clen; 499296341Sdelphij do { 500296341Sdelphij if (uio == NULL) { 501296341Sdelphij /* 502296341Sdelphij * Data is prepackaged in "top". 503296341Sdelphij */ 504296341Sdelphij resid = 0; 505296341Sdelphij if (flags & MSG_EOR) 506296341Sdelphij top->m_flags |= M_EOR; 507296341Sdelphij } else do { 508296341Sdelphij if (top == 0) { 509296341Sdelphij MGETHDR(m, M_WAIT, MT_DATA); 510296341Sdelphij mlen = MHLEN; 511296341Sdelphij m->m_pkthdr.len = 0; 512296341Sdelphij m->m_pkthdr.rcvif = (struct ifnet *)0; 513296341Sdelphij } else { 514296341Sdelphij MGET(m, M_WAIT, MT_DATA); 515296341Sdelphij mlen = MLEN; 516296341Sdelphij } 517296341Sdelphij if (resid >= MINCLSIZE) { 518296341Sdelphij MCLGET(m, M_WAIT); 519296341Sdelphij if ((m->m_flags & M_EXT) == 0) 520296341Sdelphij goto nopages; 521296341Sdelphij mlen = MCLBYTES; 522296341Sdelphij len = min(min(mlen, resid), space); 523296341Sdelphij } else { 524296341Sdelphijnopages: 525296341Sdelphij len = min(min(mlen, resid), space); 526296341Sdelphij /* 527296341Sdelphij * For datagram protocols, leave room 528296341Sdelphij * for protocol headers in first mbuf. 529296341Sdelphij */ 530296341Sdelphij if (atomic && top == 0 && len < mlen) 531296341Sdelphij MH_ALIGN(m, len); 532296341Sdelphij } 533296341Sdelphij space -= len; 534296341Sdelphij error = uiomove(mtod(m, caddr_t), (int)len, uio); 535296341Sdelphij resid = uio->uio_resid; 536296341Sdelphij m->m_len = len; 537296341Sdelphij *mp = m; 538296341Sdelphij top->m_pkthdr.len += len; 539296341Sdelphij if (error) 540296341Sdelphij goto release; 541296341Sdelphij mp = &m->m_next; 542296341Sdelphij if (resid <= 0) { 543296341Sdelphij if (flags & MSG_EOR) 544296341Sdelphij top->m_flags |= M_EOR; 545296341Sdelphij break; 546296341Sdelphij } 547296341Sdelphij } while (space > 0 && atomic); 548296341Sdelphij if (dontroute) 549296341Sdelphij so->so_options |= SO_DONTROUTE; 550296341Sdelphij s = splnet(); /* XXX */ 551296341Sdelphij /* 552296341Sdelphij * XXX all the SS_CANTSENDMORE checks previously 553296341Sdelphij * done could be out of date. We could have recieved 554296341Sdelphij * a reset packet in an interrupt or maybe we slept 555296341Sdelphij * while doing page faults in uiomove() etc. We could 556296341Sdelphij * probably recheck again inside the splnet() protection 557296341Sdelphij * here, but there are probably other places that this 558296341Sdelphij * also happens. We must rethink this. 559296341Sdelphij */ 560296341Sdelphij error = (*so->so_proto->pr_usrreqs->pru_send)(so, 561296341Sdelphij (flags & MSG_OOB) ? PRUS_OOB : 562296341Sdelphij /* 563296341Sdelphij * If the user set MSG_EOF, the protocol 564296341Sdelphij * understands this flag and nothing left to 565296341Sdelphij * send then use PRU_SEND_EOF instead of PRU_SEND. 566296341Sdelphij */ 567296341Sdelphij ((flags & MSG_EOF) && 568296341Sdelphij (so->so_proto->pr_flags & PR_IMPLOPCL) && 569296341Sdelphij (resid <= 0)) ? 570296341Sdelphij PRUS_EOF : 571296341Sdelphij /* If there is more to send set PRUS_MORETOCOME */ 572109998Smarkm (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 573296341Sdelphij top, addr, control, p); 574296341Sdelphij splx(s); 575296341Sdelphij if (dontroute) 576296341Sdelphij so->so_options &= ~SO_DONTROUTE; 577109998Smarkm clen = 0; 578296341Sdelphij control = 0; 579296341Sdelphij top = 0; 580296341Sdelphij mp = ⊤ 581296341Sdelphij if (error) 582109998Smarkm goto release; 583296341Sdelphij } while (resid && space > 0); 584296341Sdelphij } while (resid); 585109998Smarkm 586296341Sdelphijrelease: 587296341Sdelphij sbunlock(&so->so_snd); 588296341Sdelphijout: 589296341Sdelphij if (top) 590296341Sdelphij m_freem(top); 591296341Sdelphij if (control) 592296341Sdelphij m_freem(control); 593296341Sdelphij return (error); 594296341Sdelphij} 595296341Sdelphij 596296341Sdelphij/* 597296341Sdelphij * Implement receive operations on a socket. 598296341Sdelphij * We depend on the way that records are added to the sockbuf 599109998Smarkm * by sbappend*. In particular, each record (mbufs linked through m_next) 600296341Sdelphij * must begin with an address if the protocol so specifies, 601296341Sdelphij * followed by an optional mbuf or mbufs containing ancillary data, 602296341Sdelphij * and then zero or more mbufs of data. 603296341Sdelphij * In order to avoid blocking network interrupts for the entire time here, 604296341Sdelphij * we splx() while doing the actual copy to user space. 605109998Smarkm * Although the sockbuf is locked, new data may still be appended, 606296341Sdelphij * and thus we must maintain consistency of the sockbuf during that time. 607296341Sdelphij * 608296341Sdelphij * The caller may receive the data as a single mbuf chain by supplying 609296341Sdelphij * an mbuf **mp0 for use in returning the chain. The uio is then used 610296341Sdelphij * only for the count in uio_resid. 611296341Sdelphij */ 612296341Sdelphijint 613296341Sdelphijsoreceive(so, psa, uio, mp0, controlp, flagsp) 614296341Sdelphij register struct socket *so; 615296341Sdelphij struct sockaddr **psa; 616296341Sdelphij struct uio *uio; 617296341Sdelphij struct mbuf **mp0; 618296341Sdelphij struct mbuf **controlp; 619296341Sdelphij int *flagsp; 620296341Sdelphij{ 621296341Sdelphij register struct mbuf *m, **mp; 622296341Sdelphij register int flags, len, error, s, offset; 623296341Sdelphij struct protosw *pr = so->so_proto; 624296341Sdelphij struct mbuf *nextrecord; 625296341Sdelphij int moff, type = 0; 626296341Sdelphij int orig_resid = uio->uio_resid; 627296341Sdelphij 628296341Sdelphij mp = mp0; 629296341Sdelphij if (psa) 630109998Smarkm *psa = 0; 631296341Sdelphij if (controlp) 632109998Smarkm *controlp = 0; 633296341Sdelphij if (flagsp) 634296341Sdelphij flags = *flagsp &~ MSG_EOR; 635296341Sdelphij else 636296341Sdelphij flags = 0; 637296341Sdelphij if (flags & MSG_OOB) { 638296341Sdelphij m = m_get(M_WAIT, MT_DATA); 639296341Sdelphij error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 640296341Sdelphij if (error) 641296341Sdelphij goto bad; 642296341Sdelphij do { 643296341Sdelphij error = uiomove(mtod(m, caddr_t), 644109998Smarkm (int) min(uio->uio_resid, m->m_len), uio); 645296341Sdelphij m = m_free(m); 646296341Sdelphij } while (uio->uio_resid && error == 0 && m); 647296341Sdelphijbad: 648296341Sdelphij if (m) 649109998Smarkm m_freem(m); 650296341Sdelphij return (error); 651296341Sdelphij } 652109998Smarkm if (mp) 653296341Sdelphij *mp = (struct mbuf *)0; 654296341Sdelphij if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 655296341Sdelphij (*pr->pr_usrreqs->pru_rcvd)(so, 0); 656296341Sdelphij 657296341Sdelphijrestart: 658296341Sdelphij error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 659296341Sdelphij if (error) 660296341Sdelphij return (error); 661296341Sdelphij s = splnet(); 662296341Sdelphij 663296341Sdelphij m = so->so_rcv.sb_mb; 664296341Sdelphij /* 665296341Sdelphij * If we have less data than requested, block awaiting more 666296341Sdelphij * (subject to any timeout) if: 667296341Sdelphij * 1. the current count is less than the low water mark, or 668296341Sdelphij * 2. MSG_WAITALL is set, and it is possible to do the entire 669296341Sdelphij * receive operation at once if we block (resid <= hiwat). 670296341Sdelphij * 3. MSG_DONTWAIT is not set 671296341Sdelphij * If MSG_WAITALL is set but resid is larger than the receive buffer, 672238405Sjkim * we have to do the receive in sections, and thus risk returning 673296341Sdelphij * a short count if a timeout or signal occurs after we start. 674296341Sdelphij */ 675296341Sdelphij if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 676296341Sdelphij so->so_rcv.sb_cc < uio->uio_resid) && 677296341Sdelphij (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 678296341Sdelphij ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 679109998Smarkm m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 680296341Sdelphij KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); 681296341Sdelphij if (so->so_error) { 682109998Smarkm if (m) 683296341Sdelphij goto dontblock; 684296341Sdelphij error = so->so_error; 685296341Sdelphij if ((flags & MSG_PEEK) == 0) 686296341Sdelphij so->so_error = 0; 687296341Sdelphij goto release; 688296341Sdelphij } 689296341Sdelphij if (so->so_state & SS_CANTRCVMORE) { 690296341Sdelphij if (m) 691296341Sdelphij goto dontblock; 692109998Smarkm else 693296341Sdelphij goto release; 694296341Sdelphij } 695296341Sdelphij for (; m; m = m->m_next) 696296341Sdelphij if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 697296341Sdelphij m = so->so_rcv.sb_mb; 698109998Smarkm goto dontblock; 699296341Sdelphij } 700296341Sdelphij if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 701296341Sdelphij (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 702296341Sdelphij error = ENOTCONN; 703296341Sdelphij goto release; 704296341Sdelphij } 705296341Sdelphij if (uio->uio_resid == 0) 706109998Smarkm goto release; 707296341Sdelphij if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 708296341Sdelphij error = EWOULDBLOCK; 709296341Sdelphij goto release; 710296341Sdelphij } 711296341Sdelphij sbunlock(&so->so_rcv); 712296341Sdelphij error = sbwait(&so->so_rcv); 713296341Sdelphij splx(s); 714296341Sdelphij if (error) 715296341Sdelphij return (error); 716296341Sdelphij goto restart; 717296341Sdelphij } 718296341Sdelphijdontblock: 719296341Sdelphij if (uio->uio_procp) 720296341Sdelphij uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 721296341Sdelphij nextrecord = m->m_nextpkt; 722296341Sdelphij if (pr->pr_flags & PR_ADDR) { 723296341Sdelphij KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 724296341Sdelphij orig_resid = 0; 725296341Sdelphij if (psa) 726296341Sdelphij *psa = dup_sockaddr(mtod(m, struct sockaddr *), 727296341Sdelphij mp0 == 0); 728296341Sdelphij if (flags & MSG_PEEK) { 729296341Sdelphij m = m->m_next; 730296341Sdelphij } else { 731296341Sdelphij sbfree(&so->so_rcv, m); 732296341Sdelphij MFREE(m, so->so_rcv.sb_mb); 733296341Sdelphij m = so->so_rcv.sb_mb; 734296341Sdelphij } 735109998Smarkm } 736296341Sdelphij while (m && m->m_type == MT_CONTROL && error == 0) { 737296341Sdelphij if (flags & MSG_PEEK) { 738296341Sdelphij if (controlp) 739296341Sdelphij *controlp = m_copy(m, 0, m->m_len); 740109998Smarkm m = m->m_next; 741296341Sdelphij } else { 742109998Smarkm sbfree(&so->so_rcv, m); 743296341Sdelphij if (controlp) { 744296341Sdelphij if (pr->pr_domain->dom_externalize && 745296341Sdelphij mtod(m, struct cmsghdr *)->cmsg_type == 746296341Sdelphij SCM_RIGHTS) 747296341Sdelphij error = (*pr->pr_domain->dom_externalize)(m); 748296341Sdelphij *controlp = m; 749296341Sdelphij so->so_rcv.sb_mb = m->m_next; 750296341Sdelphij m->m_next = 0; 751296341Sdelphij m = so->so_rcv.sb_mb; 752109998Smarkm } else { 753296341Sdelphij MFREE(m, so->so_rcv.sb_mb); 754109998Smarkm m = so->so_rcv.sb_mb; 755296341Sdelphij } 756296341Sdelphij } 757296341Sdelphij if (controlp) { 758296341Sdelphij orig_resid = 0; 759296341Sdelphij controlp = &(*controlp)->m_next; 760296341Sdelphij } 761296341Sdelphij } 762296341Sdelphij if (m) { 763109998Smarkm if ((flags & MSG_PEEK) == 0) 764296341Sdelphij m->m_nextpkt = nextrecord; 765296341Sdelphij type = m->m_type; 766109998Smarkm if (type == MT_OOBDATA) 767296341Sdelphij flags |= MSG_OOB; 768296341Sdelphij } 769296341Sdelphij moff = 0; 770296341Sdelphij offset = 0; 771296341Sdelphij while (m && uio->uio_resid > 0 && error == 0) { 772296341Sdelphij if (m->m_type == MT_OOBDATA) { 773296341Sdelphij if (type != MT_OOBDATA) 774296341Sdelphij break; 775296341Sdelphij } else if (type == MT_OOBDATA) 776296341Sdelphij break; 777296341Sdelphij else 778296341Sdelphij KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 779296341Sdelphij ("receive 3")); 780296341Sdelphij so->so_state &= ~SS_RCVATMARK; 781296341Sdelphij len = uio->uio_resid; 782296341Sdelphij if (so->so_oobmark && len > so->so_oobmark - offset) 783109998Smarkm len = so->so_oobmark - offset; 784296341Sdelphij if (len > m->m_len - moff) 785296341Sdelphij len = m->m_len - moff; 786296341Sdelphij /* 787296341Sdelphij * If mp is set, just pass back the mbufs. 788296341Sdelphij * Otherwise copy them out via the uio, then free. 789296341Sdelphij * Sockbuf must be consistent here (points to current mbuf, 790296341Sdelphij * it points to next record) when we drop priority; 791296341Sdelphij * we must note any additions to the sockbuf when we 792296341Sdelphij * block interrupts again. 793296341Sdelphij */ 794109998Smarkm if (mp == 0) { 795296341Sdelphij splx(s); 796109998Smarkm error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 797296341Sdelphij s = splnet(); 798296341Sdelphij if (error) 799296341Sdelphij goto release; 800296341Sdelphij } else 801109998Smarkm uio->uio_resid -= len; 802296341Sdelphij if (len == m->m_len - moff) { 803296341Sdelphij if (m->m_flags & M_EOR) 804296341Sdelphij flags |= MSG_EOR; 805296341Sdelphij if (flags & MSG_PEEK) { 806296341Sdelphij m = m->m_next; 807296341Sdelphij moff = 0; 808296341Sdelphij } else { 809296341Sdelphij nextrecord = m->m_nextpkt; 810296341Sdelphij sbfree(&so->so_rcv, m); 811109998Smarkm if (mp) { 812296341Sdelphij *mp = m; 813296341Sdelphij mp = &m->m_next; 814296341Sdelphij so->so_rcv.sb_mb = m = m->m_next; 815109998Smarkm *mp = (struct mbuf *)0; 816296341Sdelphij } else { 817296341Sdelphij MFREE(m, so->so_rcv.sb_mb); 818296341Sdelphij m = so->so_rcv.sb_mb; 819296341Sdelphij } 820296341Sdelphij if (m) 821109998Smarkm m->m_nextpkt = nextrecord; 822296341Sdelphij } 823109998Smarkm } else { 824296341Sdelphij if (flags & MSG_PEEK) 825296341Sdelphij moff += len; 826109998Smarkm else { 827296341Sdelphij if (mp) 828109998Smarkm *mp = m_copym(m, 0, len, M_WAIT); 829296341Sdelphij m->m_data += len; 830296341Sdelphij m->m_len -= len; 831296341Sdelphij so->so_rcv.sb_cc -= len; 832296341Sdelphij } 833296341Sdelphij } 834296341Sdelphij if (so->so_oobmark) { 835296341Sdelphij if ((flags & MSG_PEEK) == 0) { 836296341Sdelphij so->so_oobmark -= len; 837296341Sdelphij if (so->so_oobmark == 0) { 838296341Sdelphij so->so_state |= SS_RCVATMARK; 839296341Sdelphij break; 840296341Sdelphij } 841296341Sdelphij } else { 842296341Sdelphij offset += len; 843296341Sdelphij if (offset == so->so_oobmark) 844296341Sdelphij break; 845296341Sdelphij } 846296341Sdelphij } 847296341Sdelphij if (flags & MSG_EOR) 848296341Sdelphij break; 849296341Sdelphij /* 850296341Sdelphij * If the MSG_WAITALL flag is set (for non-atomic socket), 851109998Smarkm * we must not quit until "uio->uio_resid == 0" or an error 852296341Sdelphij * termination. If a signal/timeout occurs, return 853296341Sdelphij * with a short count but without error. 854296341Sdelphij * Keep sockbuf locked against other readers. 855296341Sdelphij */ 856296341Sdelphij while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 857296341Sdelphij !sosendallatonce(so) && !nextrecord) { 858296341Sdelphij if (so->so_error || so->so_state & SS_CANTRCVMORE) 859296341Sdelphij break; 860109998Smarkm error = sbwait(&so->so_rcv); 861109998Smarkm if (error) { 862296341Sdelphij sbunlock(&so->so_rcv); 863296341Sdelphij splx(s); 864296341Sdelphij return (0); 865296341Sdelphij } 866296341Sdelphij m = so->so_rcv.sb_mb; 867296341Sdelphij if (m) 868296341Sdelphij nextrecord = m->m_nextpkt; 869296341Sdelphij } 870296341Sdelphij } 871296341Sdelphij 872296341Sdelphij if (m && pr->pr_flags & PR_ATOMIC) { 873296341Sdelphij flags |= MSG_TRUNC; 874296341Sdelphij if ((flags & MSG_PEEK) == 0) 875296341Sdelphij (void) sbdroprecord(&so->so_rcv); 876296341Sdelphij } 877296341Sdelphij if ((flags & MSG_PEEK) == 0) { 878296341Sdelphij if (m == 0) 879296341Sdelphij so->so_rcv.sb_mb = nextrecord; 880296341Sdelphij if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 881109998Smarkm (*pr->pr_usrreqs->pru_rcvd)(so, flags); 882296341Sdelphij } 883296341Sdelphij if (orig_resid == uio->uio_resid && orig_resid && 884296341Sdelphij (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 885296341Sdelphij sbunlock(&so->so_rcv); 886109998Smarkm splx(s); 887296341Sdelphij goto restart; 888296341Sdelphij } 889296341Sdelphij 890296341Sdelphij if (flagsp) 891296341Sdelphij *flagsp |= flags; 892296341Sdelphijrelease: 893296341Sdelphij sbunlock(&so->so_rcv); 894296341Sdelphij splx(s); 895296341Sdelphij return (error); 896296341Sdelphij} 897296341Sdelphij 898296341Sdelphijint 899296341Sdelphijsoshutdown(so, how) 900296341Sdelphij register struct socket *so; 901296341Sdelphij register int how; 902296341Sdelphij{ 903296341Sdelphij register struct protosw *pr = so->so_proto; 904296341Sdelphij 905296341Sdelphij how++; 906296341Sdelphij if (how & FREAD) 907296341Sdelphij sorflush(so); 908296341Sdelphij if (how & FWRITE) 909296341Sdelphij return ((*pr->pr_usrreqs->pru_shutdown)(so)); 910296341Sdelphij return (0); 911296341Sdelphij} 912296341Sdelphij 913296341Sdelphijvoid 914296341Sdelphijsorflush(so) 915296341Sdelphij register struct socket *so; 916296341Sdelphij{ 917109998Smarkm register struct sockbuf *sb = &so->so_rcv; 918296341Sdelphij register struct protosw *pr = so->so_proto; 919296341Sdelphij register int s; 920296341Sdelphij struct sockbuf asb; 921296341Sdelphij 922109998Smarkm sb->sb_flags |= SB_NOINTR; 923109998Smarkm (void) sblock(sb, M_WAITOK); 924296341Sdelphij s = splimp(); 925296341Sdelphij socantrcvmore(so); 926296341Sdelphij sbunlock(sb); 927296341Sdelphij asb = *sb; 928296341Sdelphij bzero((caddr_t)sb, sizeof (*sb)); 929296341Sdelphij splx(s); 930296341Sdelphij if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 931109998Smarkm (*pr->pr_domain->dom_dispose)(asb.sb_mb); 932296341Sdelphij sbrelease(&asb, so); 933109998Smarkm} 934296341Sdelphij 935109998Smarkm/* 936296341Sdelphij * Perhaps this routine, and sooptcopyout(), below, ought to come in 937296341Sdelphij * an additional variant to handle the case where the option value needs 938296341Sdelphij * to be some kind of integer, but not a specific size. 939109998Smarkm * In addition to their use here, these functions are also called by the 940296341Sdelphij * protocol-level pr_ctloutput() routines. 941296341Sdelphij */ 942296341Sdelphijint 943296341Sdelphijsooptcopyin(sopt, buf, len, minlen) 944109998Smarkm struct sockopt *sopt; 945296341Sdelphij void *buf; 946296341Sdelphij size_t len; 947296341Sdelphij size_t minlen; 948296341Sdelphij{ 949296341Sdelphij size_t valsize; 950109998Smarkm 951296341Sdelphij /* 952296341Sdelphij * If the user gives us more than we wanted, we ignore it, 953296341Sdelphij * but if we don't get the minimum length the caller 954296341Sdelphij * wants, we return EINVAL. On success, sopt->sopt_valsize 955296341Sdelphij * is set to however much we actually retrieved. 956296341Sdelphij */ 957296341Sdelphij if ((valsize = sopt->sopt_valsize) < minlen) 958296341Sdelphij return EINVAL; 959296341Sdelphij if (valsize > len) 960109998Smarkm sopt->sopt_valsize = valsize = len; 961296341Sdelphij 962296341Sdelphij if (sopt->sopt_p != 0) 963296341Sdelphij return (copyin(sopt->sopt_val, buf, valsize)); 964109998Smarkm 965296341Sdelphij bcopy(sopt->sopt_val, buf, valsize); 966296341Sdelphij return 0; 967296341Sdelphij} 968296341Sdelphij 969296341Sdelphijint 970109998Smarkmsosetopt(so, sopt) 971296341Sdelphij struct socket *so; 972296341Sdelphij struct sockopt *sopt; 973109998Smarkm{ 974296341Sdelphij int error, optval; 975296341Sdelphij struct linger l; 976109998Smarkm struct timeval tv; 977296341Sdelphij u_long val; 978296341Sdelphij 979296341Sdelphij error = 0; 980296341Sdelphij if (sopt->sopt_level != SOL_SOCKET) { 981109998Smarkm if (so->so_proto && so->so_proto->pr_ctloutput) 982296341Sdelphij return ((*so->so_proto->pr_ctloutput) 983296341Sdelphij (so, sopt)); 984109998Smarkm error = ENOPROTOOPT; 985296341Sdelphij } else { 986296341Sdelphij switch (sopt->sopt_name) { 987296341Sdelphij case SO_LINGER: 988296341Sdelphij error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 989296341Sdelphij if (error) 990296341Sdelphij goto bad; 991296341Sdelphij 992296341Sdelphij so->so_linger = l.l_linger; 993296341Sdelphij if (l.l_onoff) 994109998Smarkm so->so_options |= SO_LINGER; 995296341Sdelphij else 996109998Smarkm so->so_options &= ~SO_LINGER; 997296341Sdelphij break; 998296341Sdelphij 999296341Sdelphij case SO_DEBUG: 1000296341Sdelphij case SO_KEEPALIVE: 1001296341Sdelphij case SO_DONTROUTE: 1002109998Smarkm case SO_USELOOPBACK: 1003296341Sdelphij case SO_BROADCAST: 1004296341Sdelphij case SO_REUSEADDR: 1005296341Sdelphij case SO_REUSEPORT: 1006296341Sdelphij case SO_OOBINLINE: 1007109998Smarkm case SO_TIMESTAMP: 1008296341Sdelphij error = sooptcopyin(sopt, &optval, sizeof optval, 1009296341Sdelphij sizeof optval); 1010296341Sdelphij if (error) 1011296341Sdelphij goto bad; 1012296341Sdelphij if (optval) 1013296341Sdelphij so->so_options |= sopt->sopt_name; 1014296341Sdelphij else 1015296341Sdelphij so->so_options &= ~sopt->sopt_name; 1016296341Sdelphij break; 1017109998Smarkm 1018296341Sdelphij case SO_SNDBUF: 1019109998Smarkm case SO_RCVBUF: 1020296341Sdelphij case SO_SNDLOWAT: 1021296341Sdelphij case SO_RCVLOWAT: 1022296341Sdelphij error = sooptcopyin(sopt, &optval, sizeof optval, 1023296341Sdelphij sizeof optval); 1024296341Sdelphij if (error) 1025296341Sdelphij goto bad; 1026296341Sdelphij 1027296341Sdelphij /* 1028296341Sdelphij * Values < 1 make no sense for any of these 1029238405Sjkim * options, so disallow them. 1030296341Sdelphij */ 1031296341Sdelphij if (optval < 1) { 1032296341Sdelphij error = EINVAL; 1033296341Sdelphij goto bad; 1034296341Sdelphij } 1035296341Sdelphij 1036296341Sdelphij switch (sopt->sopt_name) { 1037296341Sdelphij case SO_SNDBUF: 1038296341Sdelphij case SO_RCVBUF: 1039296341Sdelphij if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 1040296341Sdelphij &so->so_snd : &so->so_rcv, (u_long)optval, 1041296341Sdelphij so, curproc) == 0) { 1042296341Sdelphij error = ENOBUFS; 1043296341Sdelphij goto bad; 1044296341Sdelphij } 1045296341Sdelphij break; 1046296341Sdelphij 1047296341Sdelphij /* 1048296341Sdelphij * Make sure the low-water is never greater than 1049296341Sdelphij * the high-water. 1050296341Sdelphij */ 1051296341Sdelphij case SO_SNDLOWAT: 1052296341Sdelphij so->so_snd.sb_lowat = 1053296341Sdelphij (optval > so->so_snd.sb_hiwat) ? 1054296341Sdelphij so->so_snd.sb_hiwat : optval; 1055296341Sdelphij break; 1056296341Sdelphij case SO_RCVLOWAT: 1057296341Sdelphij so->so_rcv.sb_lowat = 1058296341Sdelphij (optval > so->so_rcv.sb_hiwat) ? 1059296341Sdelphij so->so_rcv.sb_hiwat : optval; 1060296341Sdelphij break; 1061296341Sdelphij } 1062296341Sdelphij break; 1063296341Sdelphij 1064296341Sdelphij case SO_SNDTIMEO: 1065296341Sdelphij case SO_RCVTIMEO: 1066296341Sdelphij error = sooptcopyin(sopt, &tv, sizeof tv, 1067296341Sdelphij sizeof tv); 1068296341Sdelphij if (error) 1069238405Sjkim goto bad; 1070296341Sdelphij 1071238405Sjkim /* assert(hz > 0); */ 1072296341Sdelphij if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || 1073109998Smarkm tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1074296341Sdelphij error = EDOM; 1075109998Smarkm goto bad; 1076296341Sdelphij } 1077296341Sdelphij /* assert(tick > 0); */ 1078296341Sdelphij /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ 1079296341Sdelphij val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 1080296341Sdelphij if (val > SHRT_MAX) { 1081296341Sdelphij error = EDOM; 1082109998Smarkm goto bad; 1083296341Sdelphij } 1084109998Smarkm 1085127128Snectar switch (sopt->sopt_name) { 1086296341Sdelphij case SO_SNDTIMEO: 1087296341Sdelphij so->so_snd.sb_timeo = val; 1088296341Sdelphij break; 1089296341Sdelphij case SO_RCVTIMEO: 1090296341Sdelphij so->so_rcv.sb_timeo = val; 1091296341Sdelphij break; 1092296341Sdelphij } 1093296341Sdelphij break; 1094296341Sdelphij 1095296341Sdelphij default: 1096296341Sdelphij error = ENOPROTOOPT; 1097296341Sdelphij break; 1098296341Sdelphij } 1099296341Sdelphij if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1100296341Sdelphij (void) ((*so->so_proto->pr_ctloutput) 1101296341Sdelphij (so, sopt)); 1102296341Sdelphij } 1103296341Sdelphij } 1104296341Sdelphijbad: 1105109998Smarkm return (error); 1106109998Smarkm} 1107109998Smarkm 1108109998Smarkm/* Helper routine for getsockopt */ 1109296341Sdelphijint 1110296341Sdelphijsooptcopyout(sopt, buf, len) 1111296341Sdelphij struct sockopt *sopt; 1112296341Sdelphij void *buf; 1113296341Sdelphij size_t len; 1114296341Sdelphij{ 1115296341Sdelphij int error; 1116296341Sdelphij size_t valsize; 1117296341Sdelphij 1118296341Sdelphij error = 0; 1119296341Sdelphij 1120296341Sdelphij /* 1121296341Sdelphij * Documented get behavior is that we always return a value, 1122296341Sdelphij * possibly truncated to fit in the user's buffer. 1123296341Sdelphij * Traditional behavior is that we always tell the user 1124109998Smarkm * precisely how much we copied, rather than something useful 1125296341Sdelphij * like the total amount we had available for her. 1126296341Sdelphij * Note that this interface is not idempotent; the entire answer must 1127296341Sdelphij * generated ahead of time. 1128296341Sdelphij */ 1129296341Sdelphij valsize = min(len, sopt->sopt_valsize); 1130109998Smarkm sopt->sopt_valsize = valsize; 1131296341Sdelphij if (sopt->sopt_val != 0) { 1132109998Smarkm if (sopt->sopt_p != 0) 1133296341Sdelphij error = copyout(buf, sopt->sopt_val, valsize); 1134296341Sdelphij else 1135296341Sdelphij bcopy(buf, sopt->sopt_val, valsize); 1136296341Sdelphij } 1137296341Sdelphij return error; 1138109998Smarkm} 1139296341Sdelphij 1140296341Sdelphijint 1141296341Sdelphijsogetopt(so, sopt) 1142296341Sdelphij struct socket *so; 1143296341Sdelphij struct sockopt *sopt; 1144296341Sdelphij{ 1145296341Sdelphij int error, optval; 1146109998Smarkm struct linger l; 1147296341Sdelphij struct timeval tv; 1148296341Sdelphij 1149296341Sdelphij error = 0; 1150296341Sdelphij if (sopt->sopt_level != SOL_SOCKET) { 1151296341Sdelphij if (so->so_proto && so->so_proto->pr_ctloutput) { 1152109998Smarkm return ((*so->so_proto->pr_ctloutput) 1153296341Sdelphij (so, sopt)); 1154296341Sdelphij } else 1155109998Smarkm return (ENOPROTOOPT); 1156296341Sdelphij } else { 1157296341Sdelphij switch (sopt->sopt_name) { 1158296341Sdelphij case SO_LINGER: 1159296341Sdelphij l.l_onoff = so->so_options & SO_LINGER; 1160296341Sdelphij l.l_linger = so->so_linger; 1161296341Sdelphij error = sooptcopyout(sopt, &l, sizeof l); 1162296341Sdelphij break; 1163296341Sdelphij 1164296341Sdelphij case SO_USELOOPBACK: 1165296341Sdelphij case SO_DONTROUTE: 1166296341Sdelphij case SO_DEBUG: 1167296341Sdelphij case SO_KEEPALIVE: 1168296341Sdelphij case SO_REUSEADDR: 1169296341Sdelphij case SO_REUSEPORT: 1170296341Sdelphij case SO_BROADCAST: 1171296341Sdelphij case SO_OOBINLINE: 1172109998Smarkm case SO_TIMESTAMP: 1173296341Sdelphij optval = so->so_options & sopt->sopt_name; 1174109998Smarkminteger: 1175296341Sdelphij error = sooptcopyout(sopt, &optval, sizeof optval); 1176109998Smarkm break; 1177296341Sdelphij 1178296341Sdelphij case SO_TYPE: 1179296341Sdelphij optval = so->so_type; 1180296341Sdelphij goto integer; 1181109998Smarkm 1182296341Sdelphij case SO_ERROR: 1183109998Smarkm optval = so->so_error; 1184296341Sdelphij so->so_error = 0; 1185109998Smarkm goto integer; 1186296341Sdelphij 1187109998Smarkm case SO_SNDBUF: 1188109998Smarkm optval = so->so_snd.sb_hiwat; 1189296341Sdelphij goto integer; 1190296341Sdelphij 1191296341Sdelphij case SO_RCVBUF: 1192296341Sdelphij optval = so->so_rcv.sb_hiwat; 1193296341Sdelphij goto integer; 1194296341Sdelphij 1195296341Sdelphij case SO_SNDLOWAT: 1196296341Sdelphij optval = so->so_snd.sb_lowat; 1197296341Sdelphij goto integer; 1198296341Sdelphij 1199296341Sdelphij case SO_RCVLOWAT: 1200109998Smarkm optval = so->so_rcv.sb_lowat; 1201194206Ssimon goto integer; 1202296341Sdelphij 1203296341Sdelphij case SO_SNDTIMEO: 1204296341Sdelphij case SO_RCVTIMEO: 1205296341Sdelphij optval = (sopt->sopt_name == SO_SNDTIMEO ? 1206296341Sdelphij so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1207296341Sdelphij 1208296341Sdelphij tv.tv_sec = optval / hz; 1209296341Sdelphij tv.tv_usec = (optval % hz) * tick; 1210296341Sdelphij error = sooptcopyout(sopt, &tv, sizeof tv); 1211296341Sdelphij break; 1212194206Ssimon 1213296341Sdelphij default: 1214296341Sdelphij error = ENOPROTOOPT; 1215194206Ssimon break; 1216296341Sdelphij } 1217194206Ssimon return (error); 1218296341Sdelphij } 1219296341Sdelphij} 1220296341Sdelphij 1221296341Sdelphij/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1222194206Ssimonint 1223296341Sdelphijsoopt_getm(struct sockopt *sopt, struct mbuf **mp) 1224296341Sdelphij{ 1225296341Sdelphij struct mbuf *m, *m_prev; 1226296341Sdelphij int sopt_size = sopt->sopt_valsize; 1227194206Ssimon 1228296341Sdelphij MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); 1229296341Sdelphij if (m == 0) 1230296341Sdelphij return ENOBUFS; 1231296341Sdelphij if (sopt_size > MLEN) { 1232296341Sdelphij MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); 1233296341Sdelphij if ((m->m_flags & M_EXT) == 0) { 1234296341Sdelphij m_free(m); 1235296341Sdelphij return ENOBUFS; 1236296341Sdelphij } 1237296341Sdelphij m->m_len = min(MCLBYTES, sopt_size); 1238296341Sdelphij } else { 1239194206Ssimon m->m_len = min(MLEN, sopt_size); 1240296341Sdelphij } 1241296341Sdelphij sopt_size -= m->m_len; 1242296341Sdelphij *mp = m; 1243194206Ssimon m_prev = m; 1244296341Sdelphij 1245296341Sdelphij while (sopt_size) { 1246296341Sdelphij MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); 1247296341Sdelphij if (m == 0) { 1248296341Sdelphij m_freem(*mp); 1249238405Sjkim return ENOBUFS; 1250296341Sdelphij } 1251296341Sdelphij if (sopt_size > MLEN) { 1252238405Sjkim MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); 1253296341Sdelphij if ((m->m_flags & M_EXT) == 0) { 1254296341Sdelphij m_freem(*mp); 1255296341Sdelphij return ENOBUFS; 1256296341Sdelphij } 1257296341Sdelphij m->m_len = min(MCLBYTES, sopt_size); 1258296341Sdelphij } else { 1259296341Sdelphij m->m_len = min(MLEN, sopt_size); 1260296341Sdelphij } 1261296341Sdelphij sopt_size -= m->m_len; 1262296341Sdelphij m_prev->m_next = m; 1263296341Sdelphij m_prev = m; 1264296341Sdelphij } 1265296341Sdelphij return 0; 1266296341Sdelphij} 1267296341Sdelphij 1268296341Sdelphij/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1269296341Sdelphijint 1270296341Sdelphijsoopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1271296341Sdelphij{ 1272296341Sdelphij struct mbuf *m0 = m; 1273296341Sdelphij 1274296341Sdelphij if (sopt->sopt_val == NULL) 1275296341Sdelphij return 0; 1276296341Sdelphij while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1277296341Sdelphij if (sopt->sopt_p != NULL) { 1278296341Sdelphij int error; 1279238405Sjkim 1280296341Sdelphij error = copyin(sopt->sopt_val, mtod(m, char *), 1281296341Sdelphij m->m_len); 1282296341Sdelphij if (error != 0) { 1283296341Sdelphij m_freem(m0); 1284194206Ssimon return(error); 1285296341Sdelphij } 1286296341Sdelphij } else 1287194206Ssimon bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 1288194206Ssimon sopt->sopt_valsize -= m->m_len; 1289296341Sdelphij (caddr_t)sopt->sopt_val += m->m_len; 1290296341Sdelphij m = m->m_next; 1291296341Sdelphij } 1292296341Sdelphij if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1293296341Sdelphij panic("ip6_sooptmcopyin"); 1294296341Sdelphij return 0; 1295296341Sdelphij} 1296296341Sdelphij 1297296341Sdelphij/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1298296341Sdelphijint 1299296341Sdelphijsoopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1300296341Sdelphij{ 1301296341Sdelphij struct mbuf *m0 = m; 1302296341Sdelphij size_t valsize = 0; 1303296341Sdelphij 1304296341Sdelphij if (sopt->sopt_val == NULL) 1305296341Sdelphij return 0; 1306296341Sdelphij while (m != NULL && sopt->sopt_valsize >= m->m_len) { 1307296341Sdelphij if (sopt->sopt_p != NULL) { 1308296341Sdelphij int error; 1309296341Sdelphij 1310296341Sdelphij error = copyout(mtod(m, char *), sopt->sopt_val, 1311296341Sdelphij m->m_len); 1312296341Sdelphij if (error != 0) { 1313296341Sdelphij m_freem(m0); 1314296341Sdelphij return(error); 1315296341Sdelphij } 1316296341Sdelphij } else 1317296341Sdelphij bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 1318296341Sdelphij sopt->sopt_valsize -= m->m_len; 1319296341Sdelphij (caddr_t)sopt->sopt_val += m->m_len; 1320296341Sdelphij valsize += m->m_len; 1321296341Sdelphij m = m->m_next; 1322296341Sdelphij } 1323296341Sdelphij if (m != NULL) { 1324194206Ssimon /* enough soopt buffer should be given from user-land */ 1325194206Ssimon m_freem(m0); 1326 return(EINVAL); 1327 } 1328 sopt->sopt_valsize = valsize; 1329 return 0; 1330} 1331 1332void 1333sohasoutofband(so) 1334 register struct socket *so; 1335{ 1336 if (so->so_sigio != NULL) 1337 pgsigio(so->so_sigio, SIGURG, 0); 1338 selwakeup(&so->so_rcv.sb_sel); 1339} 1340 1341int 1342sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) 1343{ 1344 int revents = 0; 1345 int s = splnet(); 1346 1347 if (events & (POLLIN | POLLRDNORM)) 1348 if (soreadable(so)) 1349 revents |= events & (POLLIN | POLLRDNORM); 1350 1351 if (events & (POLLOUT | POLLWRNORM)) 1352 if (sowriteable(so)) 1353 revents |= events & (POLLOUT | POLLWRNORM); 1354 1355 if (events & (POLLPRI | POLLRDBAND)) 1356 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 1357 revents |= events & (POLLPRI | POLLRDBAND); 1358 1359 if (revents == 0) { 1360 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 1361 selrecord(p, &so->so_rcv.sb_sel); 1362 so->so_rcv.sb_flags |= SB_SEL; 1363 } 1364 1365 if (events & (POLLOUT | POLLWRNORM)) { 1366 selrecord(p, &so->so_snd.sb_sel); 1367 so->so_snd.sb_flags |= SB_SEL; 1368 } 1369 } 1370 1371 splx(s); 1372 return (revents); 1373} 1374