1139804Simp/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 4. Neither the name of the University nor the names of its contributors 141541Srgrimes * may be used to endorse or promote products derived from this software 151541Srgrimes * without specific prior written permission. 161541Srgrimes * 171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271541Srgrimes * SUCH DAMAGE. 281541Srgrimes * 291541Srgrimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 301541Srgrimes */ 311541Srgrimes 32116182Sobrien#include <sys/cdefs.h> 33116182Sobrien__FBSDID("$FreeBSD: releng/11.0/sys/kern/uipc_sockbuf.c 298069 2016-04-15 16:10:11Z pfg $"); 34116182Sobrien 3577598Sjesper#include "opt_param.h" 36101013Srwatson 371541Srgrimes#include <sys/param.h> 3895759Stanimura#include <sys/aio.h> /* for aio_swake proto */ 3912041Swollman#include <sys/kernel.h> 4076166Smarkm#include <sys/lock.h> 41295126Sglebius#include <sys/malloc.h> 421541Srgrimes#include <sys/mbuf.h> 4395759Stanimura#include <sys/mutex.h> 4476166Smarkm#include <sys/proc.h> 451541Srgrimes#include <sys/protosw.h> 4651381Sgreen#include <sys/resourcevar.h> 4795759Stanimura#include <sys/signalvar.h> 481541Srgrimes#include <sys/socket.h> 491541Srgrimes#include <sys/socketvar.h> 50169236Srwatson#include <sys/sx.h> 5112041Swollman#include <sys/sysctl.h> 521541Srgrimes 53160621Srwatson/* 54160621Srwatson * Function pointer set by the AIO routines so that the socket buffer code 55160621Srwatson * can call back into the AIO module if it is loaded. 56160621Srwatson */ 57160621Srwatsonvoid (*aio_swake)(struct socket *, struct sockbuf *); 5888633Salfred 591541Srgrimes/* 60160621Srwatson * Primitive routines for operating on socket buffers 611541Srgrimes */ 621541Srgrimes 63101996Sdgu_long sb_max = SB_MAX; 64172557Smohansu_long sb_max_adj = 65225169Sbz (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 661541Srgrimes 6713267Swollmanstatic u_long sb_efficiency = 8; /* parameter for sbreserve() */ 6813267Swollman 69256185Sglebiusstatic struct mbuf *sbcut_internal(struct sockbuf *sb, int len); 70160915Srwatsonstatic void sbflush_internal(struct sockbuf *sb); 71160875Srwatson 721541Srgrimes/* 73293432Sglebius * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. 74293432Sglebius */ 75293432Sglebiusstatic void 76293432Sglebiussbm_clrprotoflags(struct mbuf *m, int flags) 77293432Sglebius{ 78293432Sglebius int mask; 79293432Sglebius 80293432Sglebius mask = ~M_PROTOFLAGS; 81293432Sglebius if (flags & PRUS_NOTREADY) 82293432Sglebius mask |= M_NOTREADY; 83293432Sglebius while (m) { 84293432Sglebius m->m_flags &= mask; 85293432Sglebius m = m->m_next; 86293432Sglebius } 87293432Sglebius} 88293432Sglebius 89293432Sglebius/* 90275326Sglebius * Mark ready "count" mbufs starting with "m". 91275326Sglebius */ 92275326Sglebiusint 93275326Sglebiussbready(struct sockbuf *sb, struct mbuf *m, int count) 94275326Sglebius{ 95275326Sglebius u_int blocker; 96275326Sglebius 97275326Sglebius SOCKBUF_LOCK_ASSERT(sb); 98275326Sglebius KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); 99275326Sglebius 100275326Sglebius blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; 101275326Sglebius 102275326Sglebius for (int i = 0; i < count; i++, m = m->m_next) { 103275326Sglebius KASSERT(m->m_flags & M_NOTREADY, 104275326Sglebius ("%s: m %p !M_NOTREADY", __func__, m)); 105275326Sglebius m->m_flags &= ~(M_NOTREADY | blocker); 106275326Sglebius if (blocker) 107275326Sglebius sb->sb_acc += m->m_len; 108275326Sglebius } 109275326Sglebius 110275326Sglebius if (!blocker) 111275326Sglebius return (EINPROGRESS); 112275326Sglebius 113275326Sglebius /* This one was blocking all the queue. */ 114275326Sglebius for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { 115275326Sglebius KASSERT(m->m_flags & M_BLOCKED, 116275326Sglebius ("%s: m %p !M_BLOCKED", __func__, m)); 117275326Sglebius m->m_flags &= ~M_BLOCKED; 118275326Sglebius sb->sb_acc += m->m_len; 119275326Sglebius } 120275326Sglebius 121275326Sglebius sb->sb_fnrdy = m; 122275326Sglebius 123275326Sglebius return (0); 124275326Sglebius} 125275326Sglebius 126275326Sglebius/* 127275312Sglebius * Adjust sockbuf state reflecting allocation of m. 128275312Sglebius */ 129275312Sglebiusvoid 130275312Sglebiussballoc(struct sockbuf *sb, struct mbuf *m) 131275312Sglebius{ 132275312Sglebius 133275312Sglebius SOCKBUF_LOCK_ASSERT(sb); 134275312Sglebius 135275326Sglebius sb->sb_ccc += m->m_len; 136275312Sglebius 137275326Sglebius if (sb->sb_fnrdy == NULL) { 138275326Sglebius if (m->m_flags & M_NOTREADY) 139275326Sglebius sb->sb_fnrdy = m; 140275326Sglebius else 141275326Sglebius sb->sb_acc += m->m_len; 142275326Sglebius } else 143275326Sglebius m->m_flags |= M_BLOCKED; 144275326Sglebius 145275312Sglebius if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 146275312Sglebius sb->sb_ctl += m->m_len; 147275312Sglebius 148275312Sglebius sb->sb_mbcnt += MSIZE; 149275312Sglebius sb->sb_mcnt += 1; 150275312Sglebius 151275312Sglebius if (m->m_flags & M_EXT) { 152275312Sglebius sb->sb_mbcnt += m->m_ext.ext_size; 153275312Sglebius sb->sb_ccnt += 1; 154275312Sglebius } 155275312Sglebius} 156275312Sglebius 157275312Sglebius/* 158275312Sglebius * Adjust sockbuf state reflecting freeing of m. 159275312Sglebius */ 160275312Sglebiusvoid 161275312Sglebiussbfree(struct sockbuf *sb, struct mbuf *m) 162275312Sglebius{ 163275312Sglebius 164275312Sglebius#if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 165275312Sglebius SOCKBUF_LOCK_ASSERT(sb); 166275312Sglebius#endif 167275312Sglebius 168275326Sglebius sb->sb_ccc -= m->m_len; 169275312Sglebius 170275326Sglebius if (!(m->m_flags & M_NOTAVAIL)) 171275326Sglebius sb->sb_acc -= m->m_len; 172275326Sglebius 173275326Sglebius if (m == sb->sb_fnrdy) { 174275326Sglebius struct mbuf *n; 175275326Sglebius 176275326Sglebius KASSERT(m->m_flags & M_NOTREADY, 177275326Sglebius ("%s: m %p !M_NOTREADY", __func__, m)); 178275326Sglebius 179275326Sglebius n = m->m_next; 180275326Sglebius while (n != NULL && !(n->m_flags & M_NOTREADY)) { 181275326Sglebius n->m_flags &= ~M_BLOCKED; 182275326Sglebius sb->sb_acc += n->m_len; 183275326Sglebius n = n->m_next; 184275326Sglebius } 185275326Sglebius sb->sb_fnrdy = n; 186275326Sglebius } 187275326Sglebius 188275312Sglebius if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 189275312Sglebius sb->sb_ctl -= m->m_len; 190275312Sglebius 191275312Sglebius sb->sb_mbcnt -= MSIZE; 192275312Sglebius sb->sb_mcnt -= 1; 193275312Sglebius if (m->m_flags & M_EXT) { 194275312Sglebius sb->sb_mbcnt -= m->m_ext.ext_size; 195275312Sglebius sb->sb_ccnt -= 1; 196275312Sglebius } 197275312Sglebius 198275312Sglebius if (sb->sb_sndptr == m) { 199275312Sglebius sb->sb_sndptr = NULL; 200275312Sglebius sb->sb_sndptroff = 0; 201275312Sglebius } 202275312Sglebius if (sb->sb_sndptroff != 0) 203275312Sglebius sb->sb_sndptroff -= m->m_len; 204275312Sglebius} 205275312Sglebius 206275312Sglebius/* 207160915Srwatson * Socantsendmore indicates that no more data will be sent on the socket; it 208160915Srwatson * would normally be applied to a socket when the user informs the system 209160915Srwatson * that no more data is to be sent, by the protocol code (in case 210160915Srwatson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be 211160915Srwatson * received, and will normally be applied to the socket by a protocol when it 212160915Srwatson * detects that the peer will send no more data. Data queued for reading in 213160915Srwatson * the socket may yet be read. 2141541Srgrimes */ 215130831Srwatsonvoid 216160915Srwatsonsocantsendmore_locked(struct socket *so) 217130831Srwatson{ 2181541Srgrimes 219130831Srwatson SOCKBUF_LOCK_ASSERT(&so->so_snd); 220130831Srwatson 221130831Srwatson so->so_snd.sb_state |= SBS_CANTSENDMORE; 222130831Srwatson sowwakeup_locked(so); 223130831Srwatson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 224130831Srwatson} 225130831Srwatson 2261549Srgrimesvoid 227160915Srwatsonsocantsendmore(struct socket *so) 2281541Srgrimes{ 2291541Srgrimes 230130831Srwatson SOCKBUF_LOCK(&so->so_snd); 231130831Srwatson socantsendmore_locked(so); 232130831Srwatson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 2331541Srgrimes} 2341541Srgrimes 2351549Srgrimesvoid 236160915Srwatsonsocantrcvmore_locked(struct socket *so) 2371541Srgrimes{ 2381541Srgrimes 239130831Srwatson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 240130831Srwatson 241130480Srwatson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 242130831Srwatson sorwakeup_locked(so); 243130831Srwatson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 2441541Srgrimes} 2451541Srgrimes 246130831Srwatsonvoid 247160915Srwatsonsocantrcvmore(struct socket *so) 248130831Srwatson{ 249130831Srwatson 250130831Srwatson SOCKBUF_LOCK(&so->so_rcv); 251130831Srwatson socantrcvmore_locked(so); 252130831Srwatson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 253130831Srwatson} 254130831Srwatson 2551541Srgrimes/* 2561541Srgrimes * Wait for data to arrive at/drain from a socket buffer. 2571541Srgrimes */ 2581549Srgrimesint 259160915Srwatsonsbwait(struct sockbuf *sb) 2601541Srgrimes{ 2611541Srgrimes 262130705Srwatson SOCKBUF_LOCK_ASSERT(sb); 263130705Srwatson 2641541Srgrimes sb->sb_flags |= SB_WAIT; 265275326Sglebius return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, 26612843Sbde (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 267255138Sdavide sb->sb_timeo, 0, 0)); 2681541Srgrimes} 2691541Srgrimes 2701549Srgrimesint 271169236Srwatsonsblock(struct sockbuf *sb, int flags) 2721541Srgrimes{ 2731541Srgrimes 274175845Srwatson KASSERT((flags & SBL_VALID) == flags, 275175845Srwatson ("sblock: flags invalid (0x%x)", flags)); 276175845Srwatson 277175845Srwatson if (flags & SBL_WAIT) { 278175845Srwatson if ((sb->sb_flags & SB_NOINTR) || 279175845Srwatson (flags & SBL_NOINTR)) { 280170151Srwatson sx_xlock(&sb->sb_sx); 281170151Srwatson return (0); 282170151Srwatson } 283170151Srwatson return (sx_xlock_sig(&sb->sb_sx)); 284169236Srwatson } else { 285169236Srwatson if (sx_try_xlock(&sb->sb_sx) == 0) 286169236Srwatson return (EWOULDBLOCK); 287169236Srwatson return (0); 2881541Srgrimes } 2891541Srgrimes} 2901541Srgrimes 291169236Srwatsonvoid 292169236Srwatsonsbunlock(struct sockbuf *sb) 293169236Srwatson{ 294169236Srwatson 295169236Srwatson sx_xunlock(&sb->sb_sx); 296169236Srwatson} 297169236Srwatson 2981541Srgrimes/* 299160915Srwatson * Wakeup processes waiting on a socket buffer. Do asynchronous notification 300160915Srwatson * via SIGIO if the socket has the SS_ASYNC flag set. 301130831Srwatson * 302130831Srwatson * Called with the socket buffer lock held; will release the lock by the end 303130831Srwatson * of the function. This allows the caller to acquire the socket buffer lock 304130831Srwatson * while testing for the need for various sorts of wakeup and hold it through 305130831Srwatson * to the point where it's no longer required. We currently hold the lock 306130831Srwatson * through calls out to other subsystems (with the exception of kqueue), and 307130831Srwatson * then release it to avoid lock order issues. It's not clear that's 308130831Srwatson * correct. 3091541Srgrimes */ 3101549Srgrimesvoid 311160915Srwatsonsowakeup(struct socket *so, struct sockbuf *sb) 3121541Srgrimes{ 313193272Sjhb int ret; 31495552Stanimura 315130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 316130831Srwatson 317122352Stanimura selwakeuppri(&sb->sb_sel, PSOCK); 318174647Sjeff if (!SEL_WAITING(&sb->sb_sel)) 319174647Sjeff sb->sb_flags &= ~SB_SEL; 3201541Srgrimes if (sb->sb_flags & SB_WAIT) { 3211541Srgrimes sb->sb_flags &= ~SB_WAIT; 322275326Sglebius wakeup(&sb->sb_acc); 3231541Srgrimes } 324133741Sjmg KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 325193272Sjhb if (sb->sb_upcall != NULL) { 326243882Sglebius ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); 327193272Sjhb if (ret == SU_ISCONNECTED) { 328193272Sjhb KASSERT(sb == &so->so_rcv, 329193272Sjhb ("SO_SND upcall returned SU_ISCONNECTED")); 330193272Sjhb soupcall_clear(so, SO_RCV); 331193272Sjhb } 332193272Sjhb } else 333193272Sjhb ret = SU_OK; 334193272Sjhb if (sb->sb_flags & SB_AIO) 335296277Sjhb sowakeup_aio(so, sb); 336130831Srwatson SOCKBUF_UNLOCK(sb); 337193272Sjhb if (ret == SU_ISCONNECTED) 338193272Sjhb soisconnected(so); 33997658Stanimura if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 34095883Salfred pgsigio(&so->so_sigio, SIGIO, 0); 341130831Srwatson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 3421541Srgrimes} 3431541Srgrimes 3441541Srgrimes/* 3451541Srgrimes * Socket buffer (struct sockbuf) utility routines. 3461541Srgrimes * 347160915Srwatson * Each socket contains two socket buffers: one for sending data and one for 348160915Srwatson * receiving data. Each buffer contains a queue of mbufs, information about 349160915Srwatson * the number of mbufs and amount of data in the queue, and other fields 350160915Srwatson * allowing select() statements and notification on data availability to be 351160915Srwatson * implemented. 3521541Srgrimes * 353160915Srwatson * Data stored in a socket buffer is maintained as a list of records. Each 354160915Srwatson * record is a list of mbufs chained together with the m_next field. Records 355160915Srwatson * are chained together with the m_nextpkt field. The upper level routine 356160915Srwatson * soreceive() expects the following conventions to be observed when placing 357160915Srwatson * information in the receive buffer: 3581541Srgrimes * 359160915Srwatson * 1. If the protocol requires each message be preceded by the sender's name, 360160915Srwatson * then a record containing that name must be present before any 361160915Srwatson * associated data (mbuf's must be of type MT_SONAME). 362160915Srwatson * 2. If the protocol supports the exchange of ``access rights'' (really just 363160915Srwatson * additional data associated with the message), and there are ``rights'' 364160915Srwatson * to be received, then a record containing this data should be present 365160915Srwatson * (mbuf's must be of type MT_RIGHTS). 366160915Srwatson * 3. If a name or rights record exists, then it must be followed by a data 367160915Srwatson * record, perhaps of zero length. 3681541Srgrimes * 3691541Srgrimes * Before using a new socket structure it is first necessary to reserve 3701541Srgrimes * buffer space to the socket, by calling sbreserve(). This should commit 3711541Srgrimes * some of the available buffer space in the system buffer pool for the 372160915Srwatson * socket (currently, it does nothing but enforce limits). The space should 373160915Srwatson * be released by calling sbrelease() when the socket is destroyed. 3741541Srgrimes */ 3751549Srgrimesint 376160915Srwatsonsoreserve(struct socket *so, u_long sndcc, u_long rcvcc) 3771541Srgrimes{ 37883366Sjulian struct thread *td = curthread; 3791541Srgrimes 380131006Srwatson SOCKBUF_LOCK(&so->so_snd); 381131006Srwatson SOCKBUF_LOCK(&so->so_rcv); 382131006Srwatson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 3831541Srgrimes goto bad; 384131006Srwatson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 3851541Srgrimes goto bad2; 3861541Srgrimes if (so->so_rcv.sb_lowat == 0) 3871541Srgrimes so->so_rcv.sb_lowat = 1; 3881541Srgrimes if (so->so_snd.sb_lowat == 0) 3891541Srgrimes so->so_snd.sb_lowat = MCLBYTES; 3901541Srgrimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 3911541Srgrimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 392131006Srwatson SOCKBUF_UNLOCK(&so->so_rcv); 393130653Srwatson SOCKBUF_UNLOCK(&so->so_snd); 3941541Srgrimes return (0); 3951541Srgrimesbad2: 396131006Srwatson sbrelease_locked(&so->so_snd, so); 3971541Srgrimesbad: 398131006Srwatson SOCKBUF_UNLOCK(&so->so_rcv); 399131006Srwatson SOCKBUF_UNLOCK(&so->so_snd); 4001541Srgrimes return (ENOBUFS); 4011541Srgrimes} 4021541Srgrimes 403101996Sdgstatic int 404101996Sdgsysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 405101996Sdg{ 406101996Sdg int error = 0; 407162086Sjhb u_long tmp_sb_max = sb_max; 408101996Sdg 409162086Sjhb error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); 410101996Sdg if (error || !req->newptr) 411101996Sdg return (error); 412162086Sjhb if (tmp_sb_max < MSIZE + MCLBYTES) 413101996Sdg return (EINVAL); 414162086Sjhb sb_max = tmp_sb_max; 415101996Sdg sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 416101996Sdg return (0); 417101996Sdg} 418101996Sdg 4191541Srgrimes/* 420160915Srwatson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't 421160915Srwatson * become limiting if buffering efficiency is near the normal case. 4221541Srgrimes */ 4231549Srgrimesint 424160915Srwatsonsbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, 425160915Srwatson struct thread *td) 4261541Srgrimes{ 427125454Sjhb rlim_t sbsize_limit; 42852070Sgreen 429131006Srwatson SOCKBUF_LOCK_ASSERT(sb); 430131006Srwatson 43152070Sgreen /* 432183663Srwatson * When a thread is passed, we take into account the thread's socket 433183663Srwatson * buffer size limit. The caller will generally pass curthread, but 434183663Srwatson * in the TCP input path, NULL will be passed to indicate that no 435183663Srwatson * appropriate thread resource limits are available. In that case, 436183663Srwatson * we don't apply a process limit. 43752070Sgreen */ 438101996Sdg if (cc > sb_max_adj) 4391541Srgrimes return (0); 440125454Sjhb if (td != NULL) { 441284215Smjg sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); 442125454Sjhb } else 443125454Sjhb sbsize_limit = RLIM_INFINITY; 44465495Struckman if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 445125454Sjhb sbsize_limit)) 44652070Sgreen return (0); 44713267Swollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 4481541Srgrimes if (sb->sb_lowat > sb->sb_hiwat) 4491541Srgrimes sb->sb_lowat = sb->sb_hiwat; 4501541Srgrimes return (1); 4511541Srgrimes} 4521541Srgrimes 453131006Srwatsonint 454160915Srwatsonsbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 455160915Srwatson struct thread *td) 456131006Srwatson{ 457131006Srwatson int error; 458131006Srwatson 459131006Srwatson SOCKBUF_LOCK(sb); 460131006Srwatson error = sbreserve_locked(sb, cc, so, td); 461131006Srwatson SOCKBUF_UNLOCK(sb); 462131006Srwatson return (error); 463131006Srwatson} 464131006Srwatson 4651541Srgrimes/* 4661541Srgrimes * Free mbufs held by a socket, and reserved mbuf space. 4671541Srgrimes */ 468175968Srwatsonvoid 469160915Srwatsonsbrelease_internal(struct sockbuf *sb, struct socket *so) 470160875Srwatson{ 471160875Srwatson 472160875Srwatson sbflush_internal(sb); 473160875Srwatson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 474160875Srwatson RLIM_INFINITY); 475160875Srwatson sb->sb_mbmax = 0; 476160875Srwatson} 477160875Srwatson 4781549Srgrimesvoid 479160915Srwatsonsbrelease_locked(struct sockbuf *sb, struct socket *so) 4801541Srgrimes{ 4811541Srgrimes 482130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 483130831Srwatson 484160875Srwatson sbrelease_internal(sb, so); 4851541Srgrimes} 4861541Srgrimes 487130831Srwatsonvoid 488160915Srwatsonsbrelease(struct sockbuf *sb, struct socket *so) 489130831Srwatson{ 490130831Srwatson 491130831Srwatson SOCKBUF_LOCK(sb); 492130831Srwatson sbrelease_locked(sb, so); 493130831Srwatson SOCKBUF_UNLOCK(sb); 494130831Srwatson} 495160875Srwatson 496160875Srwatsonvoid 497160915Srwatsonsbdestroy(struct sockbuf *sb, struct socket *so) 498160875Srwatson{ 499160875Srwatson 500160875Srwatson sbrelease_internal(sb, so); 501160875Srwatson} 502160875Srwatson 5031541Srgrimes/* 504160915Srwatson * Routines to add and remove data from an mbuf queue. 5051541Srgrimes * 506160915Srwatson * The routines sbappend() or sbappendrecord() are normally called to append 507160915Srwatson * new mbufs to a socket buffer, after checking that adequate space is 508160915Srwatson * available, comparing the function sbspace() with the amount of data to be 509160915Srwatson * added. sbappendrecord() differs from sbappend() in that data supplied is 510160915Srwatson * treated as the beginning of a new record. To place a sender's address, 511160915Srwatson * optional access rights, and data in a socket receive buffer, 512160915Srwatson * sbappendaddr() should be used. To place access rights and data in a 513160915Srwatson * socket receive buffer, sbappendrights() should be used. In either case, 514160915Srwatson * the new data begins a new record. Note that unlike sbappend() and 515160915Srwatson * sbappendrecord(), these routines check for the caller that there will be 516160915Srwatson * enough space to store the data. Each fails if there is not enough space, 517160915Srwatson * or if it cannot find mbufs to store additional information in. 5181541Srgrimes * 519160915Srwatson * Reliable protocols may use the socket send buffer to hold data awaiting 520160915Srwatson * acknowledgement. Data is normally copied from a socket send buffer in a 521160915Srwatson * protocol with m_copy for output to a peer, and then removing the data from 522160915Srwatson * the socket buffer with sbdrop() or sbdroprecord() when the data is 523160915Srwatson * acknowledged by the peer. 5241541Srgrimes */ 525121628Ssam#ifdef SOCKBUF_DEBUG 526121628Ssamvoid 527121628Ssamsblastrecordchk(struct sockbuf *sb, const char *file, int line) 528121628Ssam{ 529121628Ssam struct mbuf *m = sb->sb_mb; 530121628Ssam 531130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 532130831Srwatson 533121628Ssam while (m && m->m_nextpkt) 534121628Ssam m = m->m_nextpkt; 535121628Ssam 536121628Ssam if (m != sb->sb_lastrecord) { 537121628Ssam printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 538121628Ssam __func__, sb->sb_mb, sb->sb_lastrecord, m); 539121628Ssam printf("packet chain:\n"); 540121628Ssam for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 541121628Ssam printf("\t%p\n", m); 542121628Ssam panic("%s from %s:%u", __func__, file, line); 543121628Ssam } 544121628Ssam} 545121628Ssam 546121628Ssamvoid 547121628Ssamsblastmbufchk(struct sockbuf *sb, const char *file, int line) 548121628Ssam{ 549121628Ssam struct mbuf *m = sb->sb_mb; 550121628Ssam struct mbuf *n; 551121628Ssam 552130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 553130831Srwatson 554121628Ssam while (m && m->m_nextpkt) 555121628Ssam m = m->m_nextpkt; 556121628Ssam 557121628Ssam while (m && m->m_next) 558121628Ssam m = m->m_next; 559121628Ssam 560121628Ssam if (m != sb->sb_mbtail) { 561121628Ssam printf("%s: sb_mb %p sb_mbtail %p last %p\n", 562121628Ssam __func__, sb->sb_mb, sb->sb_mbtail, m); 563121628Ssam printf("packet tree:\n"); 564121628Ssam for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 565121628Ssam printf("\t"); 566121628Ssam for (n = m; n != NULL; n = n->m_next) 567121628Ssam printf("%p ", n); 568121628Ssam printf("\n"); 569121628Ssam } 570121628Ssam panic("%s from %s:%u", __func__, file, line); 571121628Ssam } 572121628Ssam} 573121628Ssam#endif /* SOCKBUF_DEBUG */ 574121628Ssam 575121628Ssam#define SBLINKRECORD(sb, m0) do { \ 576130831Srwatson SOCKBUF_LOCK_ASSERT(sb); \ 577121628Ssam if ((sb)->sb_lastrecord != NULL) \ 578121628Ssam (sb)->sb_lastrecord->m_nextpkt = (m0); \ 579121628Ssam else \ 580121628Ssam (sb)->sb_mb = (m0); \ 581121628Ssam (sb)->sb_lastrecord = (m0); \ 582121628Ssam} while (/*CONSTCOND*/0) 583121628Ssam 5841541Srgrimes/* 585160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb. The 586160915Srwatson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 587160915Srwatson * are discarded and mbufs are compacted where possible. 5881541Srgrimes */ 5891549Srgrimesvoid 590293432Sglebiussbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) 5911541Srgrimes{ 592160915Srwatson struct mbuf *n; 5931541Srgrimes 594130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 595130831Srwatson 596298069Spfg if (m == NULL) 5971541Srgrimes return; 598293432Sglebius sbm_clrprotoflags(m, flags); 599121628Ssam SBLASTRECORDCHK(sb); 6003308Sphk n = sb->sb_mb; 6013308Sphk if (n) { 6021541Srgrimes while (n->m_nextpkt) 6031541Srgrimes n = n->m_nextpkt; 6041541Srgrimes do { 6051541Srgrimes if (n->m_flags & M_EOR) { 606130831Srwatson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 6071541Srgrimes return; 6081541Srgrimes } 6091541Srgrimes } while (n->m_next && (n = n->m_next)); 610121628Ssam } else { 611121628Ssam /* 612121628Ssam * XXX Would like to simply use sb_mbtail here, but 613121628Ssam * XXX I need to verify that I won't miss an EOR that 614121628Ssam * XXX way. 615121628Ssam */ 616121628Ssam if ((n = sb->sb_lastrecord) != NULL) { 617121628Ssam do { 618121628Ssam if (n->m_flags & M_EOR) { 619130831Srwatson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 620121628Ssam return; 621121628Ssam } 622121628Ssam } while (n->m_next && (n = n->m_next)); 623121628Ssam } else { 624121628Ssam /* 625121628Ssam * If this is the first record in the socket buffer, 626121628Ssam * it's also the last record. 627121628Ssam */ 628121628Ssam sb->sb_lastrecord = m; 629121628Ssam } 6301541Srgrimes } 6311541Srgrimes sbcompress(sb, m, n); 632121628Ssam SBLASTRECORDCHK(sb); 6331541Srgrimes} 6341541Srgrimes 635121628Ssam/* 636160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb. The 637160915Srwatson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 638160915Srwatson * are discarded and mbufs are compacted where possible. 639130831Srwatson */ 640130831Srwatsonvoid 641293432Sglebiussbappend(struct sockbuf *sb, struct mbuf *m, int flags) 642130831Srwatson{ 643130831Srwatson 644130831Srwatson SOCKBUF_LOCK(sb); 645293432Sglebius sbappend_locked(sb, m, flags); 646130831Srwatson SOCKBUF_UNLOCK(sb); 647130831Srwatson} 648130831Srwatson 649130831Srwatson/* 650160915Srwatson * This version of sbappend() should only be used when the caller absolutely 651160915Srwatson * knows that there will never be more than one record in the socket buffer, 652160915Srwatson * that is, a stream protocol (such as TCP). 653121628Ssam */ 654121628Ssamvoid 655275329Sglebiussbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) 656121628Ssam{ 657130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 658121628Ssam 659121628Ssam KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 660121628Ssam KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 661121628Ssam 662121628Ssam SBLASTMBUFCHK(sb); 663121628Ssam 664248886Sglebius /* Remove all packet headers and mbuf tags to get a pure data chain. */ 665275329Sglebius m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); 666275329Sglebius 667121628Ssam sbcompress(sb, m, sb->sb_mbtail); 668121628Ssam 669121628Ssam sb->sb_lastrecord = sb->sb_mb; 670121628Ssam SBLASTRECORDCHK(sb); 671121628Ssam} 672121628Ssam 673130831Srwatson/* 674160915Srwatson * This version of sbappend() should only be used when the caller absolutely 675160915Srwatson * knows that there will never be more than one record in the socket buffer, 676160915Srwatson * that is, a stream protocol (such as TCP). 677130831Srwatson */ 678130831Srwatsonvoid 679275329Sglebiussbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) 680130831Srwatson{ 681130831Srwatson 682130831Srwatson SOCKBUF_LOCK(sb); 683275329Sglebius sbappendstream_locked(sb, m, flags); 684130831Srwatson SOCKBUF_UNLOCK(sb); 685130831Srwatson} 686130831Srwatson 6871541Srgrimes#ifdef SOCKBUF_DEBUG 6881549Srgrimesvoid 689275315Sglebiussbcheck(struct sockbuf *sb, const char *file, int line) 6901541Srgrimes{ 691275326Sglebius struct mbuf *m, *n, *fnrdy; 692275326Sglebius u_long acc, ccc, mbcnt; 6931541Srgrimes 694130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 695130831Srwatson 696275326Sglebius acc = ccc = mbcnt = 0; 697275326Sglebius fnrdy = NULL; 698275315Sglebius 69940913Sfenner for (m = sb->sb_mb; m; m = n) { 70040913Sfenner n = m->m_nextpkt; 70140913Sfenner for (; m; m = m->m_next) { 702275315Sglebius if (m->m_len == 0) { 703275315Sglebius printf("sb %p empty mbuf %p\n", sb, m); 704275315Sglebius goto fail; 705275315Sglebius } 706275326Sglebius if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { 707275326Sglebius if (m != sb->sb_fnrdy) { 708275326Sglebius printf("sb %p: fnrdy %p != m %p\n", 709275326Sglebius sb, sb->sb_fnrdy, m); 710275326Sglebius goto fail; 711275326Sglebius } 712275326Sglebius fnrdy = m; 713275326Sglebius } 714275326Sglebius if (fnrdy) { 715275326Sglebius if (!(m->m_flags & M_NOTAVAIL)) { 716275326Sglebius printf("sb %p: fnrdy %p, m %p is avail\n", 717275326Sglebius sb, sb->sb_fnrdy, m); 718275326Sglebius goto fail; 719275326Sglebius } 720275326Sglebius } else 721275326Sglebius acc += m->m_len; 722275326Sglebius ccc += m->m_len; 7231541Srgrimes mbcnt += MSIZE; 72417675Sjulian if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 7251541Srgrimes mbcnt += m->m_ext.ext_size; 72640913Sfenner } 7271541Srgrimes } 728275326Sglebius if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { 729275326Sglebius printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", 730275326Sglebius acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); 731275315Sglebius goto fail; 7321541Srgrimes } 733275315Sglebius return; 734275315Sglebiusfail: 735275315Sglebius panic("%s from %s:%u", __func__, file, line); 7361541Srgrimes} 7371541Srgrimes#endif 7381541Srgrimes 7391541Srgrimes/* 740160915Srwatson * As above, except the mbuf chain begins a new record. 7411541Srgrimes */ 7421549Srgrimesvoid 743160915Srwatsonsbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) 7441541Srgrimes{ 745160915Srwatson struct mbuf *m; 7461541Srgrimes 747130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 748130831Srwatson 749298069Spfg if (m0 == NULL) 7501541Srgrimes return; 751276058Sglebius m_clrprotoflags(m0); 7521541Srgrimes /* 753160915Srwatson * Put the first mbuf on the queue. Note this permits zero length 754160915Srwatson * records. 7551541Srgrimes */ 7561541Srgrimes sballoc(sb, m0); 757121628Ssam SBLASTRECORDCHK(sb); 758121628Ssam SBLINKRECORD(sb, m0); 759191366Semax sb->sb_mbtail = m0; 7601541Srgrimes m = m0->m_next; 7611541Srgrimes m0->m_next = 0; 7621541Srgrimes if (m && (m0->m_flags & M_EOR)) { 7631541Srgrimes m0->m_flags &= ~M_EOR; 7641541Srgrimes m->m_flags |= M_EOR; 7651541Srgrimes } 766191366Semax /* always call sbcompress() so it can do SBLASTMBUFCHK() */ 7671541Srgrimes sbcompress(sb, m, m0); 7681541Srgrimes} 7691541Srgrimes 7701541Srgrimes/* 771160915Srwatson * As above, except the mbuf chain begins a new record. 772130831Srwatson */ 773130831Srwatsonvoid 774160915Srwatsonsbappendrecord(struct sockbuf *sb, struct mbuf *m0) 775130831Srwatson{ 776130831Srwatson 777130831Srwatson SOCKBUF_LOCK(sb); 778130831Srwatson sbappendrecord_locked(sb, m0); 779130831Srwatson SOCKBUF_UNLOCK(sb); 780130831Srwatson} 781130831Srwatson 782262867Sasomers/* Helper routine that appends data, control, and address to a sockbuf. */ 783262867Sasomersstatic int 784262867Sasomerssbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, 785262867Sasomers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) 7861541Srgrimes{ 787121628Ssam struct mbuf *m, *n, *nlast; 788118045Sscottl#if MSIZE <= 256 7891541Srgrimes if (asa->sa_len > MLEN) 7901541Srgrimes return (0); 791118045Sscottl#endif 792248318Sglebius m = m_get(M_NOWAIT, MT_SONAME); 793248318Sglebius if (m == NULL) 7941541Srgrimes return (0); 7951541Srgrimes m->m_len = asa->sa_len; 79698998Salfred bcopy(asa, mtod(m, caddr_t), asa->sa_len); 797276058Sglebius if (m0) 798276058Sglebius m_clrprotoflags(m0); 799262867Sasomers if (ctrl_last) 800262867Sasomers ctrl_last->m_next = m0; /* concatenate data to control */ 8011541Srgrimes else 8021541Srgrimes control = m0; 8031541Srgrimes m->m_next = control; 804121628Ssam for (n = m; n->m_next != NULL; n = n->m_next) 8051541Srgrimes sballoc(sb, n); 806121628Ssam sballoc(sb, n); 807121628Ssam nlast = n; 808121628Ssam SBLINKRECORD(sb, m); 809121628Ssam 810121628Ssam sb->sb_mbtail = nlast; 811121628Ssam SBLASTMBUFCHK(sb); 812121628Ssam 813121628Ssam SBLASTRECORDCHK(sb); 8141541Srgrimes return (1); 8151541Srgrimes} 8161541Srgrimes 817130831Srwatson/* 818160915Srwatson * Append address and data, and optionally, control (ancillary) data to the 819160915Srwatson * receive queue of a socket. If present, m0 must include a packet header 820160915Srwatson * with total length. Returns 0 if no space in sockbuf or insufficient 821160915Srwatson * mbufs. 822130831Srwatson */ 8231549Srgrimesint 824262867Sasomerssbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 825262867Sasomers struct mbuf *m0, struct mbuf *control) 826262867Sasomers{ 827262867Sasomers struct mbuf *ctrl_last; 828262867Sasomers int space = asa->sa_len; 829262867Sasomers 830262867Sasomers SOCKBUF_LOCK_ASSERT(sb); 831262867Sasomers 832262867Sasomers if (m0 && (m0->m_flags & M_PKTHDR) == 0) 833262867Sasomers panic("sbappendaddr_locked"); 834262867Sasomers if (m0) 835262867Sasomers space += m0->m_pkthdr.len; 836262867Sasomers space += m_length(control, &ctrl_last); 837262867Sasomers 838262867Sasomers if (space > sbspace(sb)) 839262867Sasomers return (0); 840262867Sasomers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 841262867Sasomers} 842262867Sasomers 843262867Sasomers/* 844262867Sasomers * Append address and data, and optionally, control (ancillary) data to the 845262867Sasomers * receive queue of a socket. If present, m0 must include a packet header 846262867Sasomers * with total length. Returns 0 if insufficient mbufs. Does not validate space 847262867Sasomers * on the receiving sockbuf. 848262867Sasomers */ 849262867Sasomersint 850262867Sasomerssbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, 851262867Sasomers struct mbuf *m0, struct mbuf *control) 852262867Sasomers{ 853262867Sasomers struct mbuf *ctrl_last; 854262867Sasomers 855262867Sasomers SOCKBUF_LOCK_ASSERT(sb); 856262867Sasomers 857262867Sasomers ctrl_last = (control == NULL) ? NULL : m_last(control); 858262867Sasomers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 859262867Sasomers} 860262867Sasomers 861262867Sasomers/* 862262867Sasomers * Append address and data, and optionally, control (ancillary) data to the 863262867Sasomers * receive queue of a socket. If present, m0 must include a packet header 864262867Sasomers * with total length. Returns 0 if no space in sockbuf or insufficient 865262867Sasomers * mbufs. 866262867Sasomers */ 867262867Sasomersint 868160915Srwatsonsbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 869160915Srwatson struct mbuf *m0, struct mbuf *control) 870130831Srwatson{ 871130831Srwatson int retval; 872130831Srwatson 873130831Srwatson SOCKBUF_LOCK(sb); 874130831Srwatson retval = sbappendaddr_locked(sb, asa, m0, control); 875130831Srwatson SOCKBUF_UNLOCK(sb); 876130831Srwatson return (retval); 877130831Srwatson} 878130831Srwatson 879130831Srwatsonint 880160915Srwatsonsbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 881160915Srwatson struct mbuf *control) 8821541Srgrimes{ 883121628Ssam struct mbuf *m, *n, *mlast; 884103554Sphk int space; 8851541Srgrimes 886130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 887130831Srwatson 888298069Spfg if (control == NULL) 889130831Srwatson panic("sbappendcontrol_locked"); 890103554Sphk space = m_length(control, &n) + m_length(m0, NULL); 891130831Srwatson 8921541Srgrimes if (space > sbspace(sb)) 8931541Srgrimes return (0); 894276058Sglebius m_clrprotoflags(m0); 8951541Srgrimes n->m_next = m0; /* concatenate data to control */ 896121628Ssam 897121628Ssam SBLASTRECORDCHK(sb); 898121628Ssam 899121628Ssam for (m = control; m->m_next; m = m->m_next) 9001541Srgrimes sballoc(sb, m); 901121628Ssam sballoc(sb, m); 902121628Ssam mlast = m; 903121628Ssam SBLINKRECORD(sb, control); 904121628Ssam 905121628Ssam sb->sb_mbtail = mlast; 906121628Ssam SBLASTMBUFCHK(sb); 907121628Ssam 908121628Ssam SBLASTRECORDCHK(sb); 9091541Srgrimes return (1); 9101541Srgrimes} 9111541Srgrimes 912130831Srwatsonint 913160915Srwatsonsbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 914130831Srwatson{ 915130831Srwatson int retval; 916130831Srwatson 917130831Srwatson SOCKBUF_LOCK(sb); 918130831Srwatson retval = sbappendcontrol_locked(sb, m0, control); 919130831Srwatson SOCKBUF_UNLOCK(sb); 920130831Srwatson return (retval); 921130831Srwatson} 922130831Srwatson 9231541Srgrimes/* 924150280Srwatson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 925150280Srwatson * (n). If (n) is NULL, the buffer is presumed empty. 926150280Srwatson * 927150280Srwatson * When the data is compressed, mbufs in the chain may be handled in one of 928150280Srwatson * three ways: 929150280Srwatson * 930150280Srwatson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 931150280Srwatson * record boundary, and no change in data type). 932150280Srwatson * 933150280Srwatson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 934150280Srwatson * an mbuf already in the socket buffer. This can occur if an 935275326Sglebius * appropriate mbuf exists, there is room, both mbufs are not marked as 936275326Sglebius * not ready, and no merging of data types will occur. 937150280Srwatson * 938150280Srwatson * (3) The mbuf may be appended to the end of the existing mbuf chain. 939150280Srwatson * 940150280Srwatson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 941150280Srwatson * end-of-record. 9421541Srgrimes */ 9431549Srgrimesvoid 944160915Srwatsonsbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 9451541Srgrimes{ 946160915Srwatson int eor = 0; 947160915Srwatson struct mbuf *o; 9481541Srgrimes 949130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 950130831Srwatson 9511541Srgrimes while (m) { 9521541Srgrimes eor |= m->m_flags & M_EOR; 9531541Srgrimes if (m->m_len == 0 && 9541541Srgrimes (eor == 0 || 9551541Srgrimes (((o = m->m_next) || (o = n)) && 9561541Srgrimes o->m_type == m->m_type))) { 957121628Ssam if (sb->sb_lastrecord == m) 958121628Ssam sb->sb_lastrecord = m->m_next; 9591541Srgrimes m = m_free(m); 9601541Srgrimes continue; 9611541Srgrimes } 96268918Sdwmalone if (n && (n->m_flags & M_EOR) == 0 && 96368918Sdwmalone M_WRITABLE(n) && 964174711Skmacy ((sb->sb_flags & SB_NOCOALESCE) == 0) && 965275326Sglebius !(m->m_flags & M_NOTREADY) && 966275326Sglebius !(n->m_flags & M_NOTREADY) && 96768918Sdwmalone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 96868918Sdwmalone m->m_len <= M_TRAILINGSPACE(n) && 9691541Srgrimes n->m_type == m->m_type) { 9701541Srgrimes bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 9711541Srgrimes (unsigned)m->m_len); 9721541Srgrimes n->m_len += m->m_len; 973275326Sglebius sb->sb_ccc += m->m_len; 974275326Sglebius if (sb->sb_fnrdy == NULL) 975275326Sglebius sb->sb_acc += m->m_len; 976151967Sandre if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 977109098Stjr /* XXX: Probably don't need.*/ 978106473Skbyanc sb->sb_ctl += m->m_len; 9791541Srgrimes m = m_free(m); 9801541Srgrimes continue; 9811541Srgrimes } 9821541Srgrimes if (n) 9831541Srgrimes n->m_next = m; 9841541Srgrimes else 9851541Srgrimes sb->sb_mb = m; 986121628Ssam sb->sb_mbtail = m; 9871541Srgrimes sballoc(sb, m); 9881541Srgrimes n = m; 9891541Srgrimes m->m_flags &= ~M_EOR; 9901541Srgrimes m = m->m_next; 9911541Srgrimes n->m_next = 0; 9921541Srgrimes } 9931541Srgrimes if (eor) { 994150280Srwatson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 995150280Srwatson n->m_flags |= eor; 9961541Srgrimes } 997121628Ssam SBLASTMBUFCHK(sb); 9981541Srgrimes} 9991541Srgrimes 10001541Srgrimes/* 1001160915Srwatson * Free all mbufs in a sockbuf. Check that all resources are reclaimed. 10021541Srgrimes */ 1003160875Srwatsonstatic void 1004160915Srwatsonsbflush_internal(struct sockbuf *sb) 10051541Srgrimes{ 10061541Srgrimes 100751757Spb while (sb->sb_mbcnt) { 100851757Spb /* 1009260819Sglebius * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: 101051757Spb * we would loop forever. Panic instead. 101151757Spb */ 1012275326Sglebius if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 101351757Spb break; 1014275326Sglebius m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); 101551757Spb } 1016275326Sglebius KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, 1017275326Sglebius ("%s: ccc %u mb %p mbcnt %u", __func__, 1018275326Sglebius sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); 10191541Srgrimes} 10201541Srgrimes 1021130831Srwatsonvoid 1022160915Srwatsonsbflush_locked(struct sockbuf *sb) 1023160875Srwatson{ 1024160875Srwatson 1025160875Srwatson SOCKBUF_LOCK_ASSERT(sb); 1026160875Srwatson sbflush_internal(sb); 1027160875Srwatson} 1028160875Srwatson 1029160875Srwatsonvoid 1030160915Srwatsonsbflush(struct sockbuf *sb) 1031130831Srwatson{ 1032130831Srwatson 1033130831Srwatson SOCKBUF_LOCK(sb); 1034130831Srwatson sbflush_locked(sb); 1035130831Srwatson SOCKBUF_UNLOCK(sb); 1036130831Srwatson} 1037130831Srwatson 10381541Srgrimes/* 1039256185Sglebius * Cut data from (the front of) a sockbuf. 10401541Srgrimes */ 1041256185Sglebiusstatic struct mbuf * 1042256185Sglebiussbcut_internal(struct sockbuf *sb, int len) 10431541Srgrimes{ 1044275326Sglebius struct mbuf *m, *next, *mfree; 10451541Srgrimes 10461541Srgrimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1047256185Sglebius mfree = NULL; 1048256185Sglebius 10491541Srgrimes while (len > 0) { 1050274509Sglebius if (m == NULL) { 1051274509Sglebius KASSERT(next, ("%s: no next, len %d", __func__, len)); 10521541Srgrimes m = next; 10531541Srgrimes next = m->m_nextpkt; 10541541Srgrimes } 10551541Srgrimes if (m->m_len > len) { 1056275326Sglebius KASSERT(!(m->m_flags & M_NOTAVAIL), 1057275326Sglebius ("%s: m %p M_NOTAVAIL", __func__, m)); 10581541Srgrimes m->m_len -= len; 10591541Srgrimes m->m_data += len; 1060275326Sglebius sb->sb_ccc -= len; 1061275326Sglebius sb->sb_acc -= len; 1062167715Sandre if (sb->sb_sndptroff != 0) 1063167715Sandre sb->sb_sndptroff -= len; 1064151967Sandre if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1065106473Skbyanc sb->sb_ctl -= len; 10661541Srgrimes break; 10671541Srgrimes } 10681541Srgrimes len -= m->m_len; 10691541Srgrimes sbfree(sb, m); 1070275326Sglebius /* 1071275326Sglebius * Do not put M_NOTREADY buffers to the free list, they 1072275326Sglebius * are referenced from outside. 1073275326Sglebius */ 1074275326Sglebius if (m->m_flags & M_NOTREADY) 1075275326Sglebius m = m->m_next; 1076275326Sglebius else { 1077275326Sglebius struct mbuf *n; 1078275326Sglebius 1079275326Sglebius n = m->m_next; 1080275326Sglebius m->m_next = mfree; 1081275326Sglebius mfree = m; 1082275326Sglebius m = n; 1083275326Sglebius } 10841541Srgrimes } 1085275968Sglebius /* 1086275968Sglebius * Free any zero-length mbufs from the buffer. 1087275968Sglebius * For SOCK_DGRAM sockets such mbufs represent empty records. 1088275968Sglebius * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, 1089275968Sglebius * when sosend_generic() needs to send only control data. 1090275968Sglebius */ 1091275968Sglebius while (m && m->m_len == 0) { 1092275968Sglebius struct mbuf *n; 1093275968Sglebius 1094275968Sglebius sbfree(sb, m); 1095275968Sglebius n = m->m_next; 1096275968Sglebius m->m_next = mfree; 1097275968Sglebius mfree = m; 1098275968Sglebius m = n; 1099275968Sglebius } 11001541Srgrimes if (m) { 11011541Srgrimes sb->sb_mb = m; 11021541Srgrimes m->m_nextpkt = next; 11031541Srgrimes } else 11041541Srgrimes sb->sb_mb = next; 1105121628Ssam /* 1106160915Srwatson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure 1107160915Srwatson * sb_lastrecord is up-to-date if we dropped part of the last record. 1108121628Ssam */ 1109121628Ssam m = sb->sb_mb; 1110121628Ssam if (m == NULL) { 1111121628Ssam sb->sb_mbtail = NULL; 1112121628Ssam sb->sb_lastrecord = NULL; 1113121628Ssam } else if (m->m_nextpkt == NULL) { 1114121628Ssam sb->sb_lastrecord = m; 1115121628Ssam } 1116256185Sglebius 1117256185Sglebius return (mfree); 11181541Srgrimes} 11191541Srgrimes 11201541Srgrimes/* 1121130831Srwatson * Drop data from (the front of) a sockbuf. 1122130831Srwatson */ 1123130831Srwatsonvoid 1124160915Srwatsonsbdrop_locked(struct sockbuf *sb, int len) 1125160875Srwatson{ 1126160875Srwatson 1127160875Srwatson SOCKBUF_LOCK_ASSERT(sb); 1128256185Sglebius m_freem(sbcut_internal(sb, len)); 1129256185Sglebius} 1130160875Srwatson 1131256185Sglebius/* 1132256185Sglebius * Drop data from (the front of) a sockbuf, 1133256185Sglebius * and return it to caller. 1134256185Sglebius */ 1135256185Sglebiusstruct mbuf * 1136256185Sglebiussbcut_locked(struct sockbuf *sb, int len) 1137256185Sglebius{ 1138256185Sglebius 1139256185Sglebius SOCKBUF_LOCK_ASSERT(sb); 1140256185Sglebius return (sbcut_internal(sb, len)); 1141160875Srwatson} 1142160875Srwatson 1143160875Srwatsonvoid 1144160915Srwatsonsbdrop(struct sockbuf *sb, int len) 1145130831Srwatson{ 1146256185Sglebius struct mbuf *mfree; 1147130831Srwatson 1148130831Srwatson SOCKBUF_LOCK(sb); 1149256185Sglebius mfree = sbcut_internal(sb, len); 1150130831Srwatson SOCKBUF_UNLOCK(sb); 1151256185Sglebius 1152256185Sglebius m_freem(mfree); 1153130831Srwatson} 1154130831Srwatson 1155130831Srwatson/* 1156167715Sandre * Maintain a pointer and offset pair into the socket buffer mbuf chain to 1157167715Sandre * avoid traversal of the entire socket buffer for larger offsets. 1158167715Sandre */ 1159167715Sandrestruct mbuf * 1160167715Sandresbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) 1161167715Sandre{ 1162167715Sandre struct mbuf *m, *ret; 1163167715Sandre 1164167715Sandre KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 1165275326Sglebius KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__)); 1166275326Sglebius KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__)); 1167167715Sandre 1168167715Sandre /* 1169167715Sandre * Is off below stored offset? Happens on retransmits. 1170167715Sandre * Just return, we can't help here. 1171167715Sandre */ 1172167715Sandre if (sb->sb_sndptroff > off) { 1173167715Sandre *moff = off; 1174167715Sandre return (sb->sb_mb); 1175167715Sandre } 1176167715Sandre 1177167715Sandre /* Return closest mbuf in chain for current offset. */ 1178167715Sandre *moff = off - sb->sb_sndptroff; 1179167715Sandre m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb; 1180251984Slstewart if (*moff == m->m_len) { 1181251984Slstewart *moff = 0; 1182251984Slstewart sb->sb_sndptroff += m->m_len; 1183251984Slstewart m = ret = m->m_next; 1184251984Slstewart KASSERT(ret->m_len > 0, 1185251984Slstewart ("mbuf %p in sockbuf %p chain has no valid data", ret, sb)); 1186251984Slstewart } 1187167715Sandre 1188167715Sandre /* Advance by len to be as close as possible for the next transmit. */ 1189167715Sandre for (off = off - sb->sb_sndptroff + len - 1; 1190182842Sbz off > 0 && m != NULL && off >= m->m_len; 1191167715Sandre m = m->m_next) { 1192167715Sandre sb->sb_sndptroff += m->m_len; 1193167715Sandre off -= m->m_len; 1194167715Sandre } 1195182842Sbz if (off > 0 && m == NULL) 1196182842Sbz panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret); 1197167715Sandre sb->sb_sndptr = m; 1198167715Sandre 1199167715Sandre return (ret); 1200167715Sandre} 1201167715Sandre 1202167715Sandre/* 1203271946Shselasky * Return the first mbuf and the mbuf data offset for the provided 1204271946Shselasky * send offset without changing the "sb_sndptroff" field. 1205271946Shselasky */ 1206271946Shselaskystruct mbuf * 1207271946Shselaskysbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) 1208271946Shselasky{ 1209271946Shselasky struct mbuf *m; 1210271946Shselasky 1211271946Shselasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 1212271946Shselasky 1213271946Shselasky /* 1214271946Shselasky * If the "off" is below the stored offset, which happens on 1215271946Shselasky * retransmits, just use "sb_mb": 1216271946Shselasky */ 1217271946Shselasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 1218271946Shselasky m = sb->sb_mb; 1219271946Shselasky } else { 1220271946Shselasky m = sb->sb_sndptr; 1221271946Shselasky off -= sb->sb_sndptroff; 1222271946Shselasky } 1223271946Shselasky while (off > 0 && m != NULL) { 1224271946Shselasky if (off < m->m_len) 1225271946Shselasky break; 1226271946Shselasky off -= m->m_len; 1227271946Shselasky m = m->m_next; 1228271946Shselasky } 1229271946Shselasky *moff = off; 1230271946Shselasky return (m); 1231271946Shselasky} 1232271946Shselasky 1233271946Shselasky/* 1234160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the 1235160915Srwatson * front. 12361541Srgrimes */ 12371549Srgrimesvoid 1238160915Srwatsonsbdroprecord_locked(struct sockbuf *sb) 12391541Srgrimes{ 1240160915Srwatson struct mbuf *m; 12411541Srgrimes 1242130831Srwatson SOCKBUF_LOCK_ASSERT(sb); 1243130831Srwatson 12441541Srgrimes m = sb->sb_mb; 12451541Srgrimes if (m) { 12461541Srgrimes sb->sb_mb = m->m_nextpkt; 12471541Srgrimes do { 12481541Srgrimes sbfree(sb, m); 124990227Sdillon m = m_free(m); 12503308Sphk } while (m); 12511541Srgrimes } 1252121628Ssam SB_EMPTY_FIXUP(sb); 12531541Srgrimes} 125417047Swollman 125519622Sfenner/* 1256160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the 1257160915Srwatson * front. 1258130831Srwatson */ 1259130831Srwatsonvoid 1260160915Srwatsonsbdroprecord(struct sockbuf *sb) 1261130831Srwatson{ 1262130831Srwatson 1263130831Srwatson SOCKBUF_LOCK(sb); 1264130831Srwatson sbdroprecord_locked(sb); 1265130831Srwatson SOCKBUF_UNLOCK(sb); 1266130831Srwatson} 1267130831Srwatson 1268167895Srwatson/* 1269167902Srwatson * Create a "control" mbuf containing the specified data with the specified 1270167902Srwatson * type for presentation on a socket buffer. 1271167895Srwatson */ 1272167895Srwatsonstruct mbuf * 1273169624Srwatsonsbcreatecontrol(caddr_t p, int size, int type, int level) 1274167895Srwatson{ 1275169624Srwatson struct cmsghdr *cp; 1276167895Srwatson struct mbuf *m; 1277167895Srwatson 1278167895Srwatson if (CMSG_SPACE((u_int)size) > MCLBYTES) 1279167895Srwatson return ((struct mbuf *) NULL); 1280167895Srwatson if (CMSG_SPACE((u_int)size) > MLEN) 1281243882Sglebius m = m_getcl(M_NOWAIT, MT_CONTROL, 0); 1282167895Srwatson else 1283243882Sglebius m = m_get(M_NOWAIT, MT_CONTROL); 1284167895Srwatson if (m == NULL) 1285167895Srwatson return ((struct mbuf *) NULL); 1286167895Srwatson cp = mtod(m, struct cmsghdr *); 1287167895Srwatson m->m_len = 0; 1288167895Srwatson KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 1289167895Srwatson ("sbcreatecontrol: short mbuf")); 1290268430Sdelphij /* 1291268430Sdelphij * Don't leave the padding between the msg header and the 1292268430Sdelphij * cmsg data and the padding after the cmsg data un-initialized. 1293268430Sdelphij */ 1294268430Sdelphij bzero(cp, CMSG_SPACE((u_int)size)); 1295167895Srwatson if (p != NULL) 1296167895Srwatson (void)memcpy(CMSG_DATA(cp), p, size); 1297167895Srwatson m->m_len = CMSG_SPACE(size); 1298167895Srwatson cp->cmsg_len = CMSG_LEN(size); 1299167895Srwatson cp->cmsg_level = level; 1300167895Srwatson cp->cmsg_type = type; 1301167895Srwatson return (m); 1302167895Srwatson} 1303167895Srwatson 1304167895Srwatson/* 1305167902Srwatson * This does the same for socket buffers that sotoxsocket does for sockets: 1306167902Srwatson * generate an user-format data structure describing the socket buffer. Note 1307167902Srwatson * that the xsockbuf structure, since it is always embedded in a socket, does 1308167902Srwatson * not include a self pointer nor a length. We make this entry point public 1309167902Srwatson * in case some other mechanism needs it. 1310167895Srwatson */ 1311167895Srwatsonvoid 1312167895Srwatsonsbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 1313167895Srwatson{ 1314169624Srwatson 1315275326Sglebius xsb->sb_cc = sb->sb_ccc; 1316167895Srwatson xsb->sb_hiwat = sb->sb_hiwat; 1317167895Srwatson xsb->sb_mbcnt = sb->sb_mbcnt; 1318179027Sgnn xsb->sb_mcnt = sb->sb_mcnt; 1319179027Sgnn xsb->sb_ccnt = sb->sb_ccnt; 1320167895Srwatson xsb->sb_mbmax = sb->sb_mbmax; 1321167895Srwatson xsb->sb_lowat = sb->sb_lowat; 1322167895Srwatson xsb->sb_flags = sb->sb_flags; 1323167895Srwatson xsb->sb_timeo = sb->sb_timeo; 1324167895Srwatson} 1325167895Srwatson 132623081Swollman/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 132723081Swollmanstatic int dummy; 132823081SwollmanSYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 1329160621SrwatsonSYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 1330110268Sharti &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 1331110268ShartiSYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 1332228449Seadler &sb_efficiency, 0, "Socket buffer size waste factor"); 1333