1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
301541Srgrimes */
311541Srgrimes
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD$");
34116182Sobrien
3577598Sjesper#include "opt_param.h"
36101013Srwatson
371541Srgrimes#include <sys/param.h>
3895759Stanimura#include <sys/aio.h> /* for aio_swake proto */
3912041Swollman#include <sys/kernel.h>
4076166Smarkm#include <sys/lock.h>
411541Srgrimes#include <sys/mbuf.h>
4295759Stanimura#include <sys/mutex.h>
4376166Smarkm#include <sys/proc.h>
441541Srgrimes#include <sys/protosw.h>
4551381Sgreen#include <sys/resourcevar.h>
4695759Stanimura#include <sys/signalvar.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49169236Srwatson#include <sys/sx.h>
5012041Swollman#include <sys/sysctl.h>
511541Srgrimes
52160621Srwatson/*
53160621Srwatson * Function pointer set by the AIO routines so that the socket buffer code
54160621Srwatson * can call back into the AIO module if it is loaded.
55160621Srwatson */
56160621Srwatsonvoid	(*aio_swake)(struct socket *, struct sockbuf *);
5788633Salfred
581541Srgrimes/*
59160621Srwatson * Primitive routines for operating on socket buffers
601541Srgrimes */
611541Srgrimes
62101996Sdgu_long	sb_max = SB_MAX;
63172557Smohansu_long sb_max_adj =
64225169Sbz       (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
651541Srgrimes
6613267Swollmanstatic	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
6713267Swollman
68256185Sglebiusstatic struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
69160915Srwatsonstatic void	sbflush_internal(struct sockbuf *sb);
70160875Srwatson
711541Srgrimes/*
72160915Srwatson * Socantsendmore indicates that no more data will be sent on the socket; it
73160915Srwatson * would normally be applied to a socket when the user informs the system
74160915Srwatson * that no more data is to be sent, by the protocol code (in case
75160915Srwatson * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
76160915Srwatson * received, and will normally be applied to the socket by a protocol when it
77160915Srwatson * detects that the peer will send no more data.  Data queued for reading in
78160915Srwatson * the socket may yet be read.
791541Srgrimes */
80130831Srwatsonvoid
81160915Srwatsonsocantsendmore_locked(struct socket *so)
82130831Srwatson{
831541Srgrimes
84130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_snd);
85130831Srwatson
86130831Srwatson	so->so_snd.sb_state |= SBS_CANTSENDMORE;
87130831Srwatson	sowwakeup_locked(so);
88130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
89130831Srwatson}
90130831Srwatson
911549Srgrimesvoid
92160915Srwatsonsocantsendmore(struct socket *so)
931541Srgrimes{
941541Srgrimes
95130831Srwatson	SOCKBUF_LOCK(&so->so_snd);
96130831Srwatson	socantsendmore_locked(so);
97130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
981541Srgrimes}
991541Srgrimes
1001549Srgrimesvoid
101160915Srwatsonsocantrcvmore_locked(struct socket *so)
1021541Srgrimes{
1031541Srgrimes
104130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
105130831Srwatson
106130480Srwatson	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
107130831Srwatson	sorwakeup_locked(so);
108130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
1091541Srgrimes}
1101541Srgrimes
111130831Srwatsonvoid
112160915Srwatsonsocantrcvmore(struct socket *so)
113130831Srwatson{
114130831Srwatson
115130831Srwatson	SOCKBUF_LOCK(&so->so_rcv);
116130831Srwatson	socantrcvmore_locked(so);
117130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
118130831Srwatson}
119130831Srwatson
1201541Srgrimes/*
1211541Srgrimes * Wait for data to arrive at/drain from a socket buffer.
1221541Srgrimes */
1231549Srgrimesint
124160915Srwatsonsbwait(struct sockbuf *sb)
1251541Srgrimes{
1261541Srgrimes
127130705Srwatson	SOCKBUF_LOCK_ASSERT(sb);
128130705Srwatson
1291541Srgrimes	sb->sb_flags |= SB_WAIT;
130255138Sdavide	return (msleep_sbt(&sb->sb_cc, &sb->sb_mtx,
13112843Sbde	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
132255138Sdavide	    sb->sb_timeo, 0, 0));
1331541Srgrimes}
1341541Srgrimes
1351549Srgrimesint
136169236Srwatsonsblock(struct sockbuf *sb, int flags)
1371541Srgrimes{
1381541Srgrimes
139175845Srwatson	KASSERT((flags & SBL_VALID) == flags,
140175845Srwatson	    ("sblock: flags invalid (0x%x)", flags));
141175845Srwatson
142175845Srwatson	if (flags & SBL_WAIT) {
143175845Srwatson		if ((sb->sb_flags & SB_NOINTR) ||
144175845Srwatson		    (flags & SBL_NOINTR)) {
145170151Srwatson			sx_xlock(&sb->sb_sx);
146170151Srwatson			return (0);
147170151Srwatson		}
148170151Srwatson		return (sx_xlock_sig(&sb->sb_sx));
149169236Srwatson	} else {
150169236Srwatson		if (sx_try_xlock(&sb->sb_sx) == 0)
151169236Srwatson			return (EWOULDBLOCK);
152169236Srwatson		return (0);
1531541Srgrimes	}
1541541Srgrimes}
1551541Srgrimes
156169236Srwatsonvoid
157169236Srwatsonsbunlock(struct sockbuf *sb)
158169236Srwatson{
159169236Srwatson
160169236Srwatson	sx_xunlock(&sb->sb_sx);
161169236Srwatson}
162169236Srwatson
1631541Srgrimes/*
164160915Srwatson * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
165160915Srwatson * via SIGIO if the socket has the SS_ASYNC flag set.
166130831Srwatson *
167130831Srwatson * Called with the socket buffer lock held; will release the lock by the end
168130831Srwatson * of the function.  This allows the caller to acquire the socket buffer lock
169130831Srwatson * while testing for the need for various sorts of wakeup and hold it through
170130831Srwatson * to the point where it's no longer required.  We currently hold the lock
171130831Srwatson * through calls out to other subsystems (with the exception of kqueue), and
172130831Srwatson * then release it to avoid lock order issues.  It's not clear that's
173130831Srwatson * correct.
1741541Srgrimes */
1751549Srgrimesvoid
176160915Srwatsonsowakeup(struct socket *so, struct sockbuf *sb)
1771541Srgrimes{
178193272Sjhb	int ret;
17995552Stanimura
180130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
181130831Srwatson
182122352Stanimura	selwakeuppri(&sb->sb_sel, PSOCK);
183174647Sjeff	if (!SEL_WAITING(&sb->sb_sel))
184174647Sjeff		sb->sb_flags &= ~SB_SEL;
1851541Srgrimes	if (sb->sb_flags & SB_WAIT) {
1861541Srgrimes		sb->sb_flags &= ~SB_WAIT;
18798998Salfred		wakeup(&sb->sb_cc);
1881541Srgrimes	}
189133741Sjmg	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
190193272Sjhb	if (sb->sb_upcall != NULL) {
191243882Sglebius		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
192193272Sjhb		if (ret == SU_ISCONNECTED) {
193193272Sjhb			KASSERT(sb == &so->so_rcv,
194193272Sjhb			    ("SO_SND upcall returned SU_ISCONNECTED"));
195193272Sjhb			soupcall_clear(so, SO_RCV);
196193272Sjhb		}
197193272Sjhb	} else
198193272Sjhb		ret = SU_OK;
199193272Sjhb	if (sb->sb_flags & SB_AIO)
200193272Sjhb		aio_swake(so, sb);
201130831Srwatson	SOCKBUF_UNLOCK(sb);
202193272Sjhb	if (ret == SU_ISCONNECTED)
203193272Sjhb		soisconnected(so);
20497658Stanimura	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
20595883Salfred		pgsigio(&so->so_sigio, SIGIO, 0);
206130831Srwatson	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
2071541Srgrimes}
2081541Srgrimes
2091541Srgrimes/*
2101541Srgrimes * Socket buffer (struct sockbuf) utility routines.
2111541Srgrimes *
212160915Srwatson * Each socket contains two socket buffers: one for sending data and one for
213160915Srwatson * receiving data.  Each buffer contains a queue of mbufs, information about
214160915Srwatson * the number of mbufs and amount of data in the queue, and other fields
215160915Srwatson * allowing select() statements and notification on data availability to be
216160915Srwatson * implemented.
2171541Srgrimes *
218160915Srwatson * Data stored in a socket buffer is maintained as a list of records.  Each
219160915Srwatson * record is a list of mbufs chained together with the m_next field.  Records
220160915Srwatson * are chained together with the m_nextpkt field. The upper level routine
221160915Srwatson * soreceive() expects the following conventions to be observed when placing
222160915Srwatson * information in the receive buffer:
2231541Srgrimes *
224160915Srwatson * 1. If the protocol requires each message be preceded by the sender's name,
225160915Srwatson *    then a record containing that name must be present before any
226160915Srwatson *    associated data (mbuf's must be of type MT_SONAME).
227160915Srwatson * 2. If the protocol supports the exchange of ``access rights'' (really just
228160915Srwatson *    additional data associated with the message), and there are ``rights''
229160915Srwatson *    to be received, then a record containing this data should be present
230160915Srwatson *    (mbuf's must be of type MT_RIGHTS).
231160915Srwatson * 3. If a name or rights record exists, then it must be followed by a data
232160915Srwatson *    record, perhaps of zero length.
2331541Srgrimes *
2341541Srgrimes * Before using a new socket structure it is first necessary to reserve
2351541Srgrimes * buffer space to the socket, by calling sbreserve().  This should commit
2361541Srgrimes * some of the available buffer space in the system buffer pool for the
237160915Srwatson * socket (currently, it does nothing but enforce limits).  The space should
238160915Srwatson * be released by calling sbrelease() when the socket is destroyed.
2391541Srgrimes */
2401549Srgrimesint
241160915Srwatsonsoreserve(struct socket *so, u_long sndcc, u_long rcvcc)
2421541Srgrimes{
24383366Sjulian	struct thread *td = curthread;
2441541Srgrimes
245131006Srwatson	SOCKBUF_LOCK(&so->so_snd);
246131006Srwatson	SOCKBUF_LOCK(&so->so_rcv);
247131006Srwatson	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
2481541Srgrimes		goto bad;
249131006Srwatson	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
2501541Srgrimes		goto bad2;
2511541Srgrimes	if (so->so_rcv.sb_lowat == 0)
2521541Srgrimes		so->so_rcv.sb_lowat = 1;
2531541Srgrimes	if (so->so_snd.sb_lowat == 0)
2541541Srgrimes		so->so_snd.sb_lowat = MCLBYTES;
2551541Srgrimes	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
2561541Srgrimes		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
257131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
258130653Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2591541Srgrimes	return (0);
2601541Srgrimesbad2:
261131006Srwatson	sbrelease_locked(&so->so_snd, so);
2621541Srgrimesbad:
263131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
264131006Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2651541Srgrimes	return (ENOBUFS);
2661541Srgrimes}
2671541Srgrimes
268101996Sdgstatic int
269101996Sdgsysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
270101996Sdg{
271101996Sdg	int error = 0;
272162086Sjhb	u_long tmp_sb_max = sb_max;
273101996Sdg
274162086Sjhb	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
275101996Sdg	if (error || !req->newptr)
276101996Sdg		return (error);
277162086Sjhb	if (tmp_sb_max < MSIZE + MCLBYTES)
278101996Sdg		return (EINVAL);
279162086Sjhb	sb_max = tmp_sb_max;
280101996Sdg	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
281101996Sdg	return (0);
282101996Sdg}
283101996Sdg
2841541Srgrimes/*
285160915Srwatson * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
286160915Srwatson * become limiting if buffering efficiency is near the normal case.
2871541Srgrimes */
2881549Srgrimesint
289160915Srwatsonsbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
290160915Srwatson    struct thread *td)
2911541Srgrimes{
292125454Sjhb	rlim_t sbsize_limit;
29352070Sgreen
294131006Srwatson	SOCKBUF_LOCK_ASSERT(sb);
295131006Srwatson
29652070Sgreen	/*
297183663Srwatson	 * When a thread is passed, we take into account the thread's socket
298183663Srwatson	 * buffer size limit.  The caller will generally pass curthread, but
299183663Srwatson	 * in the TCP input path, NULL will be passed to indicate that no
300183663Srwatson	 * appropriate thread resource limits are available.  In that case,
301183663Srwatson	 * we don't apply a process limit.
30252070Sgreen	 */
303101996Sdg	if (cc > sb_max_adj)
3041541Srgrimes		return (0);
305125454Sjhb	if (td != NULL) {
306125454Sjhb		PROC_LOCK(td->td_proc);
307125454Sjhb		sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
308125454Sjhb		PROC_UNLOCK(td->td_proc);
309125454Sjhb	} else
310125454Sjhb		sbsize_limit = RLIM_INFINITY;
31165495Struckman	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
312125454Sjhb	    sbsize_limit))
31352070Sgreen		return (0);
31413267Swollman	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
3151541Srgrimes	if (sb->sb_lowat > sb->sb_hiwat)
3161541Srgrimes		sb->sb_lowat = sb->sb_hiwat;
3171541Srgrimes	return (1);
3181541Srgrimes}
3191541Srgrimes
320131006Srwatsonint
321160915Srwatsonsbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
322160915Srwatson    struct thread *td)
323131006Srwatson{
324131006Srwatson	int error;
325131006Srwatson
326131006Srwatson	SOCKBUF_LOCK(sb);
327131006Srwatson	error = sbreserve_locked(sb, cc, so, td);
328131006Srwatson	SOCKBUF_UNLOCK(sb);
329131006Srwatson	return (error);
330131006Srwatson}
331131006Srwatson
3321541Srgrimes/*
3331541Srgrimes * Free mbufs held by a socket, and reserved mbuf space.
3341541Srgrimes */
335175968Srwatsonvoid
336160915Srwatsonsbrelease_internal(struct sockbuf *sb, struct socket *so)
337160875Srwatson{
338160875Srwatson
339160875Srwatson	sbflush_internal(sb);
340160875Srwatson	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
341160875Srwatson	    RLIM_INFINITY);
342160875Srwatson	sb->sb_mbmax = 0;
343160875Srwatson}
344160875Srwatson
3451549Srgrimesvoid
346160915Srwatsonsbrelease_locked(struct sockbuf *sb, struct socket *so)
3471541Srgrimes{
3481541Srgrimes
349130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
350130831Srwatson
351160875Srwatson	sbrelease_internal(sb, so);
3521541Srgrimes}
3531541Srgrimes
354130831Srwatsonvoid
355160915Srwatsonsbrelease(struct sockbuf *sb, struct socket *so)
356130831Srwatson{
357130831Srwatson
358130831Srwatson	SOCKBUF_LOCK(sb);
359130831Srwatson	sbrelease_locked(sb, so);
360130831Srwatson	SOCKBUF_UNLOCK(sb);
361130831Srwatson}
362160875Srwatson
363160875Srwatsonvoid
364160915Srwatsonsbdestroy(struct sockbuf *sb, struct socket *so)
365160875Srwatson{
366160875Srwatson
367160875Srwatson	sbrelease_internal(sb, so);
368160875Srwatson}
369160875Srwatson
3701541Srgrimes/*
371160915Srwatson * Routines to add and remove data from an mbuf queue.
3721541Srgrimes *
373160915Srwatson * The routines sbappend() or sbappendrecord() are normally called to append
374160915Srwatson * new mbufs to a socket buffer, after checking that adequate space is
375160915Srwatson * available, comparing the function sbspace() with the amount of data to be
376160915Srwatson * added.  sbappendrecord() differs from sbappend() in that data supplied is
377160915Srwatson * treated as the beginning of a new record.  To place a sender's address,
378160915Srwatson * optional access rights, and data in a socket receive buffer,
379160915Srwatson * sbappendaddr() should be used.  To place access rights and data in a
380160915Srwatson * socket receive buffer, sbappendrights() should be used.  In either case,
381160915Srwatson * the new data begins a new record.  Note that unlike sbappend() and
382160915Srwatson * sbappendrecord(), these routines check for the caller that there will be
383160915Srwatson * enough space to store the data.  Each fails if there is not enough space,
384160915Srwatson * or if it cannot find mbufs to store additional information in.
3851541Srgrimes *
386160915Srwatson * Reliable protocols may use the socket send buffer to hold data awaiting
387160915Srwatson * acknowledgement.  Data is normally copied from a socket send buffer in a
388160915Srwatson * protocol with m_copy for output to a peer, and then removing the data from
389160915Srwatson * the socket buffer with sbdrop() or sbdroprecord() when the data is
390160915Srwatson * acknowledged by the peer.
3911541Srgrimes */
392121628Ssam#ifdef SOCKBUF_DEBUG
393121628Ssamvoid
394121628Ssamsblastrecordchk(struct sockbuf *sb, const char *file, int line)
395121628Ssam{
396121628Ssam	struct mbuf *m = sb->sb_mb;
397121628Ssam
398130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
399130831Srwatson
400121628Ssam	while (m && m->m_nextpkt)
401121628Ssam		m = m->m_nextpkt;
402121628Ssam
403121628Ssam	if (m != sb->sb_lastrecord) {
404121628Ssam		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
405121628Ssam			__func__, sb->sb_mb, sb->sb_lastrecord, m);
406121628Ssam		printf("packet chain:\n");
407121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
408121628Ssam			printf("\t%p\n", m);
409121628Ssam		panic("%s from %s:%u", __func__, file, line);
410121628Ssam	}
411121628Ssam}
412121628Ssam
413121628Ssamvoid
414121628Ssamsblastmbufchk(struct sockbuf *sb, const char *file, int line)
415121628Ssam{
416121628Ssam	struct mbuf *m = sb->sb_mb;
417121628Ssam	struct mbuf *n;
418121628Ssam
419130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
420130831Srwatson
421121628Ssam	while (m && m->m_nextpkt)
422121628Ssam		m = m->m_nextpkt;
423121628Ssam
424121628Ssam	while (m && m->m_next)
425121628Ssam		m = m->m_next;
426121628Ssam
427121628Ssam	if (m != sb->sb_mbtail) {
428121628Ssam		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
429121628Ssam			__func__, sb->sb_mb, sb->sb_mbtail, m);
430121628Ssam		printf("packet tree:\n");
431121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
432121628Ssam			printf("\t");
433121628Ssam			for (n = m; n != NULL; n = n->m_next)
434121628Ssam				printf("%p ", n);
435121628Ssam			printf("\n");
436121628Ssam		}
437121628Ssam		panic("%s from %s:%u", __func__, file, line);
438121628Ssam	}
439121628Ssam}
440121628Ssam#endif /* SOCKBUF_DEBUG */
441121628Ssam
442121628Ssam#define SBLINKRECORD(sb, m0) do {					\
443130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);					\
444121628Ssam	if ((sb)->sb_lastrecord != NULL)				\
445121628Ssam		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
446121628Ssam	else								\
447121628Ssam		(sb)->sb_mb = (m0);					\
448121628Ssam	(sb)->sb_lastrecord = (m0);					\
449121628Ssam} while (/*CONSTCOND*/0)
450121628Ssam
4511541Srgrimes/*
452160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
453160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
454160915Srwatson * are discarded and mbufs are compacted where possible.
4551541Srgrimes */
4561549Srgrimesvoid
457160915Srwatsonsbappend_locked(struct sockbuf *sb, struct mbuf *m)
4581541Srgrimes{
459160915Srwatson	struct mbuf *n;
4601541Srgrimes
461130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
462130831Srwatson
4631541Srgrimes	if (m == 0)
4641541Srgrimes		return;
465130831Srwatson
466121628Ssam	SBLASTRECORDCHK(sb);
4673308Sphk	n = sb->sb_mb;
4683308Sphk	if (n) {
4691541Srgrimes		while (n->m_nextpkt)
4701541Srgrimes			n = n->m_nextpkt;
4711541Srgrimes		do {
4721541Srgrimes			if (n->m_flags & M_EOR) {
473130831Srwatson				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
4741541Srgrimes				return;
4751541Srgrimes			}
4761541Srgrimes		} while (n->m_next && (n = n->m_next));
477121628Ssam	} else {
478121628Ssam		/*
479121628Ssam		 * XXX Would like to simply use sb_mbtail here, but
480121628Ssam		 * XXX I need to verify that I won't miss an EOR that
481121628Ssam		 * XXX way.
482121628Ssam		 */
483121628Ssam		if ((n = sb->sb_lastrecord) != NULL) {
484121628Ssam			do {
485121628Ssam				if (n->m_flags & M_EOR) {
486130831Srwatson					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
487121628Ssam					return;
488121628Ssam				}
489121628Ssam			} while (n->m_next && (n = n->m_next));
490121628Ssam		} else {
491121628Ssam			/*
492121628Ssam			 * If this is the first record in the socket buffer,
493121628Ssam			 * it's also the last record.
494121628Ssam			 */
495121628Ssam			sb->sb_lastrecord = m;
496121628Ssam		}
4971541Srgrimes	}
4981541Srgrimes	sbcompress(sb, m, n);
499121628Ssam	SBLASTRECORDCHK(sb);
5001541Srgrimes}
5011541Srgrimes
502121628Ssam/*
503160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
504160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
505160915Srwatson * are discarded and mbufs are compacted where possible.
506130831Srwatson */
507130831Srwatsonvoid
508160915Srwatsonsbappend(struct sockbuf *sb, struct mbuf *m)
509130831Srwatson{
510130831Srwatson
511130831Srwatson	SOCKBUF_LOCK(sb);
512130831Srwatson	sbappend_locked(sb, m);
513130831Srwatson	SOCKBUF_UNLOCK(sb);
514130831Srwatson}
515130831Srwatson
516130831Srwatson/*
517160915Srwatson * This version of sbappend() should only be used when the caller absolutely
518160915Srwatson * knows that there will never be more than one record in the socket buffer,
519160915Srwatson * that is, a stream protocol (such as TCP).
520121628Ssam */
521121628Ssamvoid
522130831Srwatsonsbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
523121628Ssam{
524130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
525121628Ssam
526121628Ssam	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
527121628Ssam	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
528121628Ssam
529121628Ssam	SBLASTMBUFCHK(sb);
530121628Ssam
531248886Sglebius	/* Remove all packet headers and mbuf tags to get a pure data chain. */
532248886Sglebius	m_demote(m, 1);
533248886Sglebius
534121628Ssam	sbcompress(sb, m, sb->sb_mbtail);
535121628Ssam
536121628Ssam	sb->sb_lastrecord = sb->sb_mb;
537121628Ssam	SBLASTRECORDCHK(sb);
538121628Ssam}
539121628Ssam
540130831Srwatson/*
541160915Srwatson * This version of sbappend() should only be used when the caller absolutely
542160915Srwatson * knows that there will never be more than one record in the socket buffer,
543160915Srwatson * that is, a stream protocol (such as TCP).
544130831Srwatson */
545130831Srwatsonvoid
546130831Srwatsonsbappendstream(struct sockbuf *sb, struct mbuf *m)
547130831Srwatson{
548130831Srwatson
549130831Srwatson	SOCKBUF_LOCK(sb);
550130831Srwatson	sbappendstream_locked(sb, m);
551130831Srwatson	SOCKBUF_UNLOCK(sb);
552130831Srwatson}
553130831Srwatson
5541541Srgrimes#ifdef SOCKBUF_DEBUG
5551549Srgrimesvoid
556160915Srwatsonsbcheck(struct sockbuf *sb)
5571541Srgrimes{
558103554Sphk	struct mbuf *m;
559103554Sphk	struct mbuf *n = 0;
560103554Sphk	u_long len = 0, mbcnt = 0;
5611541Srgrimes
562130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
563130831Srwatson
56440913Sfenner	for (m = sb->sb_mb; m; m = n) {
56540913Sfenner	    n = m->m_nextpkt;
56640913Sfenner	    for (; m; m = m->m_next) {
5671541Srgrimes		len += m->m_len;
5681541Srgrimes		mbcnt += MSIZE;
56917675Sjulian		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
5701541Srgrimes			mbcnt += m->m_ext.ext_size;
57140913Sfenner	    }
5721541Srgrimes	}
5731541Srgrimes	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
574121628Ssam		printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
5751541Srgrimes		    mbcnt, sb->sb_mbcnt);
5761541Srgrimes		panic("sbcheck");
5771541Srgrimes	}
5781541Srgrimes}
5791541Srgrimes#endif
5801541Srgrimes
5811541Srgrimes/*
582160915Srwatson * As above, except the mbuf chain begins a new record.
5831541Srgrimes */
5841549Srgrimesvoid
585160915Srwatsonsbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
5861541Srgrimes{
587160915Srwatson	struct mbuf *m;
5881541Srgrimes
589130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
590130831Srwatson
5911541Srgrimes	if (m0 == 0)
5921541Srgrimes		return;
5931541Srgrimes	/*
594160915Srwatson	 * Put the first mbuf on the queue.  Note this permits zero length
595160915Srwatson	 * records.
5961541Srgrimes	 */
5971541Srgrimes	sballoc(sb, m0);
598121628Ssam	SBLASTRECORDCHK(sb);
599121628Ssam	SBLINKRECORD(sb, m0);
600191366Semax	sb->sb_mbtail = m0;
6011541Srgrimes	m = m0->m_next;
6021541Srgrimes	m0->m_next = 0;
6031541Srgrimes	if (m && (m0->m_flags & M_EOR)) {
6041541Srgrimes		m0->m_flags &= ~M_EOR;
6051541Srgrimes		m->m_flags |= M_EOR;
6061541Srgrimes	}
607191366Semax	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
6081541Srgrimes	sbcompress(sb, m, m0);
6091541Srgrimes}
6101541Srgrimes
6111541Srgrimes/*
612160915Srwatson * As above, except the mbuf chain begins a new record.
613130831Srwatson */
614130831Srwatsonvoid
615160915Srwatsonsbappendrecord(struct sockbuf *sb, struct mbuf *m0)
616130831Srwatson{
617130831Srwatson
618130831Srwatson	SOCKBUF_LOCK(sb);
619130831Srwatson	sbappendrecord_locked(sb, m0);
620130831Srwatson	SOCKBUF_UNLOCK(sb);
621130831Srwatson}
622130831Srwatson
623263820Sasomers/* Helper routine that appends data, control, and address to a sockbuf. */
624263820Sasomersstatic int
625263820Sasomerssbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
626263820Sasomers    struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
6271541Srgrimes{
628121628Ssam	struct mbuf *m, *n, *nlast;
629118045Sscottl#if MSIZE <= 256
6301541Srgrimes	if (asa->sa_len > MLEN)
6311541Srgrimes		return (0);
632118045Sscottl#endif
633248318Sglebius	m = m_get(M_NOWAIT, MT_SONAME);
634248318Sglebius	if (m == NULL)
6351541Srgrimes		return (0);
6361541Srgrimes	m->m_len = asa->sa_len;
63798998Salfred	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
638263820Sasomers	if (ctrl_last)
639263820Sasomers		ctrl_last->m_next = m0;	/* concatenate data to control */
6401541Srgrimes	else
6411541Srgrimes		control = m0;
6421541Srgrimes	m->m_next = control;
643121628Ssam	for (n = m; n->m_next != NULL; n = n->m_next)
6441541Srgrimes		sballoc(sb, n);
645121628Ssam	sballoc(sb, n);
646121628Ssam	nlast = n;
647121628Ssam	SBLINKRECORD(sb, m);
648121628Ssam
649121628Ssam	sb->sb_mbtail = nlast;
650121628Ssam	SBLASTMBUFCHK(sb);
651121628Ssam
652121628Ssam	SBLASTRECORDCHK(sb);
6531541Srgrimes	return (1);
6541541Srgrimes}
6551541Srgrimes
656130831Srwatson/*
657160915Srwatson * Append address and data, and optionally, control (ancillary) data to the
658160915Srwatson * receive queue of a socket.  If present, m0 must include a packet header
659160915Srwatson * with total length.  Returns 0 if no space in sockbuf or insufficient
660160915Srwatson * mbufs.
661130831Srwatson */
6621549Srgrimesint
663263820Sasomerssbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
664263820Sasomers    struct mbuf *m0, struct mbuf *control)
665263820Sasomers{
666263820Sasomers	struct mbuf *ctrl_last;
667263820Sasomers	int space = asa->sa_len;
668263820Sasomers
669263820Sasomers	SOCKBUF_LOCK_ASSERT(sb);
670263820Sasomers
671263820Sasomers	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
672263820Sasomers		panic("sbappendaddr_locked");
673263820Sasomers	if (m0)
674263820Sasomers		space += m0->m_pkthdr.len;
675263820Sasomers	space += m_length(control, &ctrl_last);
676263820Sasomers
677263820Sasomers	if (space > sbspace(sb))
678263820Sasomers		return (0);
679263820Sasomers	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
680263820Sasomers}
681263820Sasomers
682263820Sasomers/*
683263820Sasomers * Append address and data, and optionally, control (ancillary) data to the
684263820Sasomers * receive queue of a socket.  If present, m0 must include a packet header
685263820Sasomers * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
686263820Sasomers * on the receiving sockbuf.
687263820Sasomers */
688263820Sasomersint
689263820Sasomerssbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
690263820Sasomers    struct mbuf *m0, struct mbuf *control)
691263820Sasomers{
692263820Sasomers	struct mbuf *ctrl_last;
693263820Sasomers
694263820Sasomers	SOCKBUF_LOCK_ASSERT(sb);
695263820Sasomers
696263820Sasomers	ctrl_last = (control == NULL) ? NULL : m_last(control);
697263820Sasomers	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
698263820Sasomers}
699263820Sasomers
700263820Sasomers/*
701263820Sasomers * Append address and data, and optionally, control (ancillary) data to the
702263820Sasomers * receive queue of a socket.  If present, m0 must include a packet header
703263820Sasomers * with total length.  Returns 0 if no space in sockbuf or insufficient
704263820Sasomers * mbufs.
705263820Sasomers */
706263820Sasomersint
707160915Srwatsonsbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
708160915Srwatson    struct mbuf *m0, struct mbuf *control)
709130831Srwatson{
710130831Srwatson	int retval;
711130831Srwatson
712130831Srwatson	SOCKBUF_LOCK(sb);
713130831Srwatson	retval = sbappendaddr_locked(sb, asa, m0, control);
714130831Srwatson	SOCKBUF_UNLOCK(sb);
715130831Srwatson	return (retval);
716130831Srwatson}
717130831Srwatson
718130831Srwatsonint
719160915Srwatsonsbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
720160915Srwatson    struct mbuf *control)
7211541Srgrimes{
722121628Ssam	struct mbuf *m, *n, *mlast;
723103554Sphk	int space;
7241541Srgrimes
725130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
726130831Srwatson
7271541Srgrimes	if (control == 0)
728130831Srwatson		panic("sbappendcontrol_locked");
729103554Sphk	space = m_length(control, &n) + m_length(m0, NULL);
730130831Srwatson
7311541Srgrimes	if (space > sbspace(sb))
7321541Srgrimes		return (0);
7331541Srgrimes	n->m_next = m0;			/* concatenate data to control */
734121628Ssam
735121628Ssam	SBLASTRECORDCHK(sb);
736121628Ssam
737121628Ssam	for (m = control; m->m_next; m = m->m_next)
7381541Srgrimes		sballoc(sb, m);
739121628Ssam	sballoc(sb, m);
740121628Ssam	mlast = m;
741121628Ssam	SBLINKRECORD(sb, control);
742121628Ssam
743121628Ssam	sb->sb_mbtail = mlast;
744121628Ssam	SBLASTMBUFCHK(sb);
745121628Ssam
746121628Ssam	SBLASTRECORDCHK(sb);
7471541Srgrimes	return (1);
7481541Srgrimes}
7491541Srgrimes
750130831Srwatsonint
751160915Srwatsonsbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
752130831Srwatson{
753130831Srwatson	int retval;
754130831Srwatson
755130831Srwatson	SOCKBUF_LOCK(sb);
756130831Srwatson	retval = sbappendcontrol_locked(sb, m0, control);
757130831Srwatson	SOCKBUF_UNLOCK(sb);
758130831Srwatson	return (retval);
759130831Srwatson}
760130831Srwatson
7611541Srgrimes/*
762150280Srwatson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
763150280Srwatson * (n).  If (n) is NULL, the buffer is presumed empty.
764150280Srwatson *
765150280Srwatson * When the data is compressed, mbufs in the chain may be handled in one of
766150280Srwatson * three ways:
767150280Srwatson *
768150280Srwatson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
769150280Srwatson *     record boundary, and no change in data type).
770150280Srwatson *
771150280Srwatson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
772150280Srwatson *     an mbuf already in the socket buffer.  This can occur if an
773150280Srwatson *     appropriate mbuf exists, there is room, and no merging of data types
774150280Srwatson *     will occur.
775150280Srwatson *
776150280Srwatson * (3) The mbuf may be appended to the end of the existing mbuf chain.
777150280Srwatson *
778150280Srwatson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
779150280Srwatson * end-of-record.
7801541Srgrimes */
7811549Srgrimesvoid
782160915Srwatsonsbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
7831541Srgrimes{
784160915Srwatson	int eor = 0;
785160915Srwatson	struct mbuf *o;
7861541Srgrimes
787130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
788130831Srwatson
7891541Srgrimes	while (m) {
7901541Srgrimes		eor |= m->m_flags & M_EOR;
7911541Srgrimes		if (m->m_len == 0 &&
7921541Srgrimes		    (eor == 0 ||
7931541Srgrimes		     (((o = m->m_next) || (o = n)) &&
7941541Srgrimes		      o->m_type == m->m_type))) {
795121628Ssam			if (sb->sb_lastrecord == m)
796121628Ssam				sb->sb_lastrecord = m->m_next;
7971541Srgrimes			m = m_free(m);
7981541Srgrimes			continue;
7991541Srgrimes		}
80068918Sdwmalone		if (n && (n->m_flags & M_EOR) == 0 &&
80168918Sdwmalone		    M_WRITABLE(n) &&
802174711Skmacy		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
80368918Sdwmalone		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
80468918Sdwmalone		    m->m_len <= M_TRAILINGSPACE(n) &&
8051541Srgrimes		    n->m_type == m->m_type) {
8061541Srgrimes			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
8071541Srgrimes			    (unsigned)m->m_len);
8081541Srgrimes			n->m_len += m->m_len;
8091541Srgrimes			sb->sb_cc += m->m_len;
810151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
811109098Stjr				/* XXX: Probably don't need.*/
812106473Skbyanc				sb->sb_ctl += m->m_len;
8131541Srgrimes			m = m_free(m);
8141541Srgrimes			continue;
8151541Srgrimes		}
8161541Srgrimes		if (n)
8171541Srgrimes			n->m_next = m;
8181541Srgrimes		else
8191541Srgrimes			sb->sb_mb = m;
820121628Ssam		sb->sb_mbtail = m;
8211541Srgrimes		sballoc(sb, m);
8221541Srgrimes		n = m;
8231541Srgrimes		m->m_flags &= ~M_EOR;
8241541Srgrimes		m = m->m_next;
8251541Srgrimes		n->m_next = 0;
8261541Srgrimes	}
8271541Srgrimes	if (eor) {
828150280Srwatson		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
829150280Srwatson		n->m_flags |= eor;
8301541Srgrimes	}
831121628Ssam	SBLASTMBUFCHK(sb);
8321541Srgrimes}
8331541Srgrimes
8341541Srgrimes/*
835160915Srwatson * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
8361541Srgrimes */
837160875Srwatsonstatic void
838160915Srwatsonsbflush_internal(struct sockbuf *sb)
8391541Srgrimes{
8401541Srgrimes
84151757Spb	while (sb->sb_mbcnt) {
84251757Spb		/*
84351757Spb		 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
84451757Spb		 * we would loop forever. Panic instead.
84551757Spb		 */
84651757Spb		if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
84751757Spb			break;
848256185Sglebius		m_freem(sbcut_internal(sb, (int)sb->sb_cc));
84951757Spb	}
85040913Sfenner	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
851160915Srwatson		panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
852160915Srwatson		    sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
8531541Srgrimes}
8541541Srgrimes
855130831Srwatsonvoid
856160915Srwatsonsbflush_locked(struct sockbuf *sb)
857160875Srwatson{
858160875Srwatson
859160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
860160875Srwatson	sbflush_internal(sb);
861160875Srwatson}
862160875Srwatson
863160875Srwatsonvoid
864160915Srwatsonsbflush(struct sockbuf *sb)
865130831Srwatson{
866130831Srwatson
867130831Srwatson	SOCKBUF_LOCK(sb);
868130831Srwatson	sbflush_locked(sb);
869130831Srwatson	SOCKBUF_UNLOCK(sb);
870130831Srwatson}
871130831Srwatson
8721541Srgrimes/*
873256185Sglebius * Cut data from (the front of) a sockbuf.
8741541Srgrimes */
875256185Sglebiusstatic struct mbuf *
876256185Sglebiussbcut_internal(struct sockbuf *sb, int len)
8771541Srgrimes{
878256185Sglebius	struct mbuf *m, *n, *next, *mfree;
8791541Srgrimes
8801541Srgrimes	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
881256185Sglebius	mfree = NULL;
882256185Sglebius
8831541Srgrimes	while (len > 0) {
8841541Srgrimes		if (m == 0) {
8851541Srgrimes			if (next == 0)
8861541Srgrimes				panic("sbdrop");
8871541Srgrimes			m = next;
8881541Srgrimes			next = m->m_nextpkt;
8891541Srgrimes			continue;
8901541Srgrimes		}
8911541Srgrimes		if (m->m_len > len) {
8921541Srgrimes			m->m_len -= len;
8931541Srgrimes			m->m_data += len;
8941541Srgrimes			sb->sb_cc -= len;
895167715Sandre			if (sb->sb_sndptroff != 0)
896167715Sandre				sb->sb_sndptroff -= len;
897151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
898106473Skbyanc				sb->sb_ctl -= len;
8991541Srgrimes			break;
9001541Srgrimes		}
9011541Srgrimes		len -= m->m_len;
9021541Srgrimes		sbfree(sb, m);
903256185Sglebius		n = m->m_next;
904256185Sglebius		m->m_next = mfree;
905256185Sglebius		mfree = m;
906256185Sglebius		m = n;
9071541Srgrimes	}
9081541Srgrimes	while (m && m->m_len == 0) {
9091541Srgrimes		sbfree(sb, m);
910256185Sglebius		n = m->m_next;
911256185Sglebius		m->m_next = mfree;
912256185Sglebius		mfree = m;
913256185Sglebius		m = n;
9141541Srgrimes	}
9151541Srgrimes	if (m) {
9161541Srgrimes		sb->sb_mb = m;
9171541Srgrimes		m->m_nextpkt = next;
9181541Srgrimes	} else
9191541Srgrimes		sb->sb_mb = next;
920121628Ssam	/*
921160915Srwatson	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
922160915Srwatson	 * sb_lastrecord is up-to-date if we dropped part of the last record.
923121628Ssam	 */
924121628Ssam	m = sb->sb_mb;
925121628Ssam	if (m == NULL) {
926121628Ssam		sb->sb_mbtail = NULL;
927121628Ssam		sb->sb_lastrecord = NULL;
928121628Ssam	} else if (m->m_nextpkt == NULL) {
929121628Ssam		sb->sb_lastrecord = m;
930121628Ssam	}
931256185Sglebius
932256185Sglebius	return (mfree);
9331541Srgrimes}
9341541Srgrimes
9351541Srgrimes/*
936130831Srwatson * Drop data from (the front of) a sockbuf.
937130831Srwatson */
938130831Srwatsonvoid
939160915Srwatsonsbdrop_locked(struct sockbuf *sb, int len)
940160875Srwatson{
941160875Srwatson
942160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
943256185Sglebius	m_freem(sbcut_internal(sb, len));
944256185Sglebius}
945160875Srwatson
946256185Sglebius/*
947256185Sglebius * Drop data from (the front of) a sockbuf,
948256185Sglebius * and return it to caller.
949256185Sglebius */
950256185Sglebiusstruct mbuf *
951256185Sglebiussbcut_locked(struct sockbuf *sb, int len)
952256185Sglebius{
953256185Sglebius
954256185Sglebius	SOCKBUF_LOCK_ASSERT(sb);
955256185Sglebius	return (sbcut_internal(sb, len));
956160875Srwatson}
957160875Srwatson
958160875Srwatsonvoid
959160915Srwatsonsbdrop(struct sockbuf *sb, int len)
960130831Srwatson{
961256185Sglebius	struct mbuf *mfree;
962130831Srwatson
963130831Srwatson	SOCKBUF_LOCK(sb);
964256185Sglebius	mfree = sbcut_internal(sb, len);
965130831Srwatson	SOCKBUF_UNLOCK(sb);
966256185Sglebius
967256185Sglebius	m_freem(mfree);
968130831Srwatson}
969130831Srwatson
970130831Srwatson/*
971167715Sandre * Maintain a pointer and offset pair into the socket buffer mbuf chain to
972167715Sandre * avoid traversal of the entire socket buffer for larger offsets.
973167715Sandre */
974167715Sandrestruct mbuf *
975167715Sandresbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
976167715Sandre{
977167715Sandre	struct mbuf *m, *ret;
978167715Sandre
979167715Sandre	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
980167715Sandre	KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
981167715Sandre	KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
982167715Sandre
983167715Sandre	/*
984167715Sandre	 * Is off below stored offset? Happens on retransmits.
985167715Sandre	 * Just return, we can't help here.
986167715Sandre	 */
987167715Sandre	if (sb->sb_sndptroff > off) {
988167715Sandre		*moff = off;
989167715Sandre		return (sb->sb_mb);
990167715Sandre	}
991167715Sandre
992167715Sandre	/* Return closest mbuf in chain for current offset. */
993167715Sandre	*moff = off - sb->sb_sndptroff;
994167715Sandre	m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
995251984Slstewart	if (*moff == m->m_len) {
996251984Slstewart		*moff = 0;
997251984Slstewart		sb->sb_sndptroff += m->m_len;
998251984Slstewart		m = ret = m->m_next;
999251984Slstewart		KASSERT(ret->m_len > 0,
1000251984Slstewart		    ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
1001251984Slstewart	}
1002167715Sandre
1003167715Sandre	/* Advance by len to be as close as possible for the next transmit. */
1004167715Sandre	for (off = off - sb->sb_sndptroff + len - 1;
1005182842Sbz	     off > 0 && m != NULL && off >= m->m_len;
1006167715Sandre	     m = m->m_next) {
1007167715Sandre		sb->sb_sndptroff += m->m_len;
1008167715Sandre		off -= m->m_len;
1009167715Sandre	}
1010182842Sbz	if (off > 0 && m == NULL)
1011182842Sbz		panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
1012167715Sandre	sb->sb_sndptr = m;
1013167715Sandre
1014167715Sandre	return (ret);
1015167715Sandre}
1016167715Sandre
1017167715Sandre/*
1018160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
1019160915Srwatson * front.
10201541Srgrimes */
10211549Srgrimesvoid
1022160915Srwatsonsbdroprecord_locked(struct sockbuf *sb)
10231541Srgrimes{
1024160915Srwatson	struct mbuf *m;
10251541Srgrimes
1026130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
1027130831Srwatson
10281541Srgrimes	m = sb->sb_mb;
10291541Srgrimes	if (m) {
10301541Srgrimes		sb->sb_mb = m->m_nextpkt;
10311541Srgrimes		do {
10321541Srgrimes			sbfree(sb, m);
103390227Sdillon			m = m_free(m);
10343308Sphk		} while (m);
10351541Srgrimes	}
1036121628Ssam	SB_EMPTY_FIXUP(sb);
10371541Srgrimes}
103817047Swollman
103919622Sfenner/*
1040160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
1041160915Srwatson * front.
1042130831Srwatson */
1043130831Srwatsonvoid
1044160915Srwatsonsbdroprecord(struct sockbuf *sb)
1045130831Srwatson{
1046130831Srwatson
1047130831Srwatson	SOCKBUF_LOCK(sb);
1048130831Srwatson	sbdroprecord_locked(sb);
1049130831Srwatson	SOCKBUF_UNLOCK(sb);
1050130831Srwatson}
1051130831Srwatson
1052167895Srwatson/*
1053167902Srwatson * Create a "control" mbuf containing the specified data with the specified
1054167902Srwatson * type for presentation on a socket buffer.
1055167895Srwatson */
1056167895Srwatsonstruct mbuf *
1057169624Srwatsonsbcreatecontrol(caddr_t p, int size, int type, int level)
1058167895Srwatson{
1059169624Srwatson	struct cmsghdr *cp;
1060167895Srwatson	struct mbuf *m;
1061167895Srwatson
1062167895Srwatson	if (CMSG_SPACE((u_int)size) > MCLBYTES)
1063167895Srwatson		return ((struct mbuf *) NULL);
1064167895Srwatson	if (CMSG_SPACE((u_int)size) > MLEN)
1065243882Sglebius		m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
1066167895Srwatson	else
1067243882Sglebius		m = m_get(M_NOWAIT, MT_CONTROL);
1068167895Srwatson	if (m == NULL)
1069167895Srwatson		return ((struct mbuf *) NULL);
1070167895Srwatson	cp = mtod(m, struct cmsghdr *);
1071167895Srwatson	m->m_len = 0;
1072167895Srwatson	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
1073167895Srwatson	    ("sbcreatecontrol: short mbuf"));
1074268432Sdelphij	/*
1075268432Sdelphij	 * Don't leave the padding between the msg header and the
1076268432Sdelphij	 * cmsg data and the padding after the cmsg data un-initialized.
1077268432Sdelphij	 */
1078268432Sdelphij	bzero(cp, CMSG_SPACE((u_int)size));
1079167895Srwatson	if (p != NULL)
1080167895Srwatson		(void)memcpy(CMSG_DATA(cp), p, size);
1081167895Srwatson	m->m_len = CMSG_SPACE(size);
1082167895Srwatson	cp->cmsg_len = CMSG_LEN(size);
1083167895Srwatson	cp->cmsg_level = level;
1084167895Srwatson	cp->cmsg_type = type;
1085167895Srwatson	return (m);
1086167895Srwatson}
1087167895Srwatson
1088167895Srwatson/*
1089167902Srwatson * This does the same for socket buffers that sotoxsocket does for sockets:
1090167902Srwatson * generate an user-format data structure describing the socket buffer.  Note
1091167902Srwatson * that the xsockbuf structure, since it is always embedded in a socket, does
1092167902Srwatson * not include a self pointer nor a length.  We make this entry point public
1093167902Srwatson * in case some other mechanism needs it.
1094167895Srwatson */
1095167895Srwatsonvoid
1096167895Srwatsonsbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1097167895Srwatson{
1098169624Srwatson
1099167895Srwatson	xsb->sb_cc = sb->sb_cc;
1100167895Srwatson	xsb->sb_hiwat = sb->sb_hiwat;
1101167895Srwatson	xsb->sb_mbcnt = sb->sb_mbcnt;
1102179027Sgnn	xsb->sb_mcnt = sb->sb_mcnt;
1103179027Sgnn	xsb->sb_ccnt = sb->sb_ccnt;
1104167895Srwatson	xsb->sb_mbmax = sb->sb_mbmax;
1105167895Srwatson	xsb->sb_lowat = sb->sb_lowat;
1106167895Srwatson	xsb->sb_flags = sb->sb_flags;
1107167895Srwatson	xsb->sb_timeo = sb->sb_timeo;
1108167895Srwatson}
1109167895Srwatson
111023081Swollman/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
111123081Swollmanstatic int dummy;
111223081SwollmanSYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1113160621SrwatsonSYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
1114110268Sharti    &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
1115110268ShartiSYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1116228449Seadler    &sb_efficiency, 0, "Socket buffer size waste factor");
1117