uipc_sockbuf.c revision 228449
1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
301541Srgrimes */
311541Srgrimes
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/uipc_sockbuf.c 228449 2011-12-13 00:38:50Z eadler $");
34116182Sobrien
3577598Sjesper#include "opt_param.h"
36101013Srwatson
371541Srgrimes#include <sys/param.h>
3895759Stanimura#include <sys/aio.h> /* for aio_swake proto */
3912041Swollman#include <sys/kernel.h>
4076166Smarkm#include <sys/lock.h>
411541Srgrimes#include <sys/mbuf.h>
4295759Stanimura#include <sys/mutex.h>
4376166Smarkm#include <sys/proc.h>
441541Srgrimes#include <sys/protosw.h>
4551381Sgreen#include <sys/resourcevar.h>
4695759Stanimura#include <sys/signalvar.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49169236Srwatson#include <sys/sx.h>
5012041Swollman#include <sys/sysctl.h>
511541Srgrimes
52160621Srwatson/*
53160621Srwatson * Function pointer set by the AIO routines so that the socket buffer code
54160621Srwatson * can call back into the AIO module if it is loaded.
55160621Srwatson */
56160621Srwatsonvoid	(*aio_swake)(struct socket *, struct sockbuf *);
5788633Salfred
581541Srgrimes/*
59160621Srwatson * Primitive routines for operating on socket buffers
601541Srgrimes */
611541Srgrimes
62101996Sdgu_long	sb_max = SB_MAX;
63172557Smohansu_long sb_max_adj =
64225169Sbz       (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
651541Srgrimes
6613267Swollmanstatic	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
6713267Swollman
68160915Srwatsonstatic void	sbdrop_internal(struct sockbuf *sb, int len);
69160915Srwatsonstatic void	sbflush_internal(struct sockbuf *sb);
70160875Srwatson
711541Srgrimes/*
72160915Srwatson * Socantsendmore indicates that no more data will be sent on the socket; it
73160915Srwatson * would normally be applied to a socket when the user informs the system
74160915Srwatson * that no more data is to be sent, by the protocol code (in case
75160915Srwatson * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
76160915Srwatson * received, and will normally be applied to the socket by a protocol when it
77160915Srwatson * detects that the peer will send no more data.  Data queued for reading in
78160915Srwatson * the socket may yet be read.
791541Srgrimes */
80130831Srwatsonvoid
81160915Srwatsonsocantsendmore_locked(struct socket *so)
82130831Srwatson{
831541Srgrimes
84130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_snd);
85130831Srwatson
86130831Srwatson	so->so_snd.sb_state |= SBS_CANTSENDMORE;
87130831Srwatson	sowwakeup_locked(so);
88130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
89130831Srwatson}
90130831Srwatson
911549Srgrimesvoid
92160915Srwatsonsocantsendmore(struct socket *so)
931541Srgrimes{
941541Srgrimes
95130831Srwatson	SOCKBUF_LOCK(&so->so_snd);
96130831Srwatson	socantsendmore_locked(so);
97130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
981541Srgrimes}
991541Srgrimes
1001549Srgrimesvoid
101160915Srwatsonsocantrcvmore_locked(struct socket *so)
1021541Srgrimes{
1031541Srgrimes
104130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
105130831Srwatson
106130480Srwatson	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
107130831Srwatson	sorwakeup_locked(so);
108130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
1091541Srgrimes}
1101541Srgrimes
111130831Srwatsonvoid
112160915Srwatsonsocantrcvmore(struct socket *so)
113130831Srwatson{
114130831Srwatson
115130831Srwatson	SOCKBUF_LOCK(&so->so_rcv);
116130831Srwatson	socantrcvmore_locked(so);
117130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
118130831Srwatson}
119130831Srwatson
1201541Srgrimes/*
1211541Srgrimes * Wait for data to arrive at/drain from a socket buffer.
1221541Srgrimes */
1231549Srgrimesint
124160915Srwatsonsbwait(struct sockbuf *sb)
1251541Srgrimes{
1261541Srgrimes
127130705Srwatson	SOCKBUF_LOCK_ASSERT(sb);
128130705Srwatson
1291541Srgrimes	sb->sb_flags |= SB_WAIT;
130130705Srwatson	return (msleep(&sb->sb_cc, &sb->sb_mtx,
13112843Sbde	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
1321541Srgrimes	    sb->sb_timeo));
1331541Srgrimes}
1341541Srgrimes
1351549Srgrimesint
136169236Srwatsonsblock(struct sockbuf *sb, int flags)
1371541Srgrimes{
1381541Srgrimes
139175845Srwatson	KASSERT((flags & SBL_VALID) == flags,
140175845Srwatson	    ("sblock: flags invalid (0x%x)", flags));
141175845Srwatson
142175845Srwatson	if (flags & SBL_WAIT) {
143175845Srwatson		if ((sb->sb_flags & SB_NOINTR) ||
144175845Srwatson		    (flags & SBL_NOINTR)) {
145170151Srwatson			sx_xlock(&sb->sb_sx);
146170151Srwatson			return (0);
147170151Srwatson		}
148170151Srwatson		return (sx_xlock_sig(&sb->sb_sx));
149169236Srwatson	} else {
150169236Srwatson		if (sx_try_xlock(&sb->sb_sx) == 0)
151169236Srwatson			return (EWOULDBLOCK);
152169236Srwatson		return (0);
1531541Srgrimes	}
1541541Srgrimes}
1551541Srgrimes
156169236Srwatsonvoid
157169236Srwatsonsbunlock(struct sockbuf *sb)
158169236Srwatson{
159169236Srwatson
160169236Srwatson	sx_xunlock(&sb->sb_sx);
161169236Srwatson}
162169236Srwatson
1631541Srgrimes/*
164160915Srwatson * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
165160915Srwatson * via SIGIO if the socket has the SS_ASYNC flag set.
166130831Srwatson *
167130831Srwatson * Called with the socket buffer lock held; will release the lock by the end
168130831Srwatson * of the function.  This allows the caller to acquire the socket buffer lock
169130831Srwatson * while testing for the need for various sorts of wakeup and hold it through
170130831Srwatson * to the point where it's no longer required.  We currently hold the lock
171130831Srwatson * through calls out to other subsystems (with the exception of kqueue), and
172130831Srwatson * then release it to avoid lock order issues.  It's not clear that's
173130831Srwatson * correct.
1741541Srgrimes */
1751549Srgrimesvoid
176160915Srwatsonsowakeup(struct socket *so, struct sockbuf *sb)
1771541Srgrimes{
178193272Sjhb	int ret;
17995552Stanimura
180130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
181130831Srwatson
182122352Stanimura	selwakeuppri(&sb->sb_sel, PSOCK);
183174647Sjeff	if (!SEL_WAITING(&sb->sb_sel))
184174647Sjeff		sb->sb_flags &= ~SB_SEL;
1851541Srgrimes	if (sb->sb_flags & SB_WAIT) {
1861541Srgrimes		sb->sb_flags &= ~SB_WAIT;
18798998Salfred		wakeup(&sb->sb_cc);
1881541Srgrimes	}
189133741Sjmg	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
190193272Sjhb	if (sb->sb_upcall != NULL) {
191193272Sjhb		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT);
192193272Sjhb		if (ret == SU_ISCONNECTED) {
193193272Sjhb			KASSERT(sb == &so->so_rcv,
194193272Sjhb			    ("SO_SND upcall returned SU_ISCONNECTED"));
195193272Sjhb			soupcall_clear(so, SO_RCV);
196193272Sjhb		}
197193272Sjhb	} else
198193272Sjhb		ret = SU_OK;
199193272Sjhb	if (sb->sb_flags & SB_AIO)
200193272Sjhb		aio_swake(so, sb);
201130831Srwatson	SOCKBUF_UNLOCK(sb);
202193272Sjhb	if (ret == SU_ISCONNECTED)
203193272Sjhb		soisconnected(so);
20497658Stanimura	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
20595883Salfred		pgsigio(&so->so_sigio, SIGIO, 0);
206130831Srwatson	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
2071541Srgrimes}
2081541Srgrimes
2091541Srgrimes/*
2101541Srgrimes * Socket buffer (struct sockbuf) utility routines.
2111541Srgrimes *
212160915Srwatson * Each socket contains two socket buffers: one for sending data and one for
213160915Srwatson * receiving data.  Each buffer contains a queue of mbufs, information about
214160915Srwatson * the number of mbufs and amount of data in the queue, and other fields
215160915Srwatson * allowing select() statements and notification on data availability to be
216160915Srwatson * implemented.
2171541Srgrimes *
218160915Srwatson * Data stored in a socket buffer is maintained as a list of records.  Each
219160915Srwatson * record is a list of mbufs chained together with the m_next field.  Records
220160915Srwatson * are chained together with the m_nextpkt field. The upper level routine
221160915Srwatson * soreceive() expects the following conventions to be observed when placing
222160915Srwatson * information in the receive buffer:
2231541Srgrimes *
224160915Srwatson * 1. If the protocol requires each message be preceded by the sender's name,
225160915Srwatson *    then a record containing that name must be present before any
226160915Srwatson *    associated data (mbuf's must be of type MT_SONAME).
227160915Srwatson * 2. If the protocol supports the exchange of ``access rights'' (really just
228160915Srwatson *    additional data associated with the message), and there are ``rights''
229160915Srwatson *    to be received, then a record containing this data should be present
230160915Srwatson *    (mbuf's must be of type MT_RIGHTS).
231160915Srwatson * 3. If a name or rights record exists, then it must be followed by a data
232160915Srwatson *    record, perhaps of zero length.
2331541Srgrimes *
2341541Srgrimes * Before using a new socket structure it is first necessary to reserve
2351541Srgrimes * buffer space to the socket, by calling sbreserve().  This should commit
2361541Srgrimes * some of the available buffer space in the system buffer pool for the
237160915Srwatson * socket (currently, it does nothing but enforce limits).  The space should
238160915Srwatson * be released by calling sbrelease() when the socket is destroyed.
2391541Srgrimes */
2401549Srgrimesint
241160915Srwatsonsoreserve(struct socket *so, u_long sndcc, u_long rcvcc)
2421541Srgrimes{
24383366Sjulian	struct thread *td = curthread;
2441541Srgrimes
245131006Srwatson	SOCKBUF_LOCK(&so->so_snd);
246131006Srwatson	SOCKBUF_LOCK(&so->so_rcv);
247131006Srwatson	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
2481541Srgrimes		goto bad;
249131006Srwatson	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
2501541Srgrimes		goto bad2;
2511541Srgrimes	if (so->so_rcv.sb_lowat == 0)
2521541Srgrimes		so->so_rcv.sb_lowat = 1;
2531541Srgrimes	if (so->so_snd.sb_lowat == 0)
2541541Srgrimes		so->so_snd.sb_lowat = MCLBYTES;
2551541Srgrimes	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
2561541Srgrimes		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
257131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
258130653Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2591541Srgrimes	return (0);
2601541Srgrimesbad2:
261131006Srwatson	sbrelease_locked(&so->so_snd, so);
2621541Srgrimesbad:
263131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
264131006Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2651541Srgrimes	return (ENOBUFS);
2661541Srgrimes}
2671541Srgrimes
268101996Sdgstatic int
269101996Sdgsysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
270101996Sdg{
271101996Sdg	int error = 0;
272162086Sjhb	u_long tmp_sb_max = sb_max;
273101996Sdg
274162086Sjhb	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
275101996Sdg	if (error || !req->newptr)
276101996Sdg		return (error);
277162086Sjhb	if (tmp_sb_max < MSIZE + MCLBYTES)
278101996Sdg		return (EINVAL);
279162086Sjhb	sb_max = tmp_sb_max;
280101996Sdg	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
281101996Sdg	return (0);
282101996Sdg}
283101996Sdg
2841541Srgrimes/*
285160915Srwatson * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
286160915Srwatson * become limiting if buffering efficiency is near the normal case.
2871541Srgrimes */
2881549Srgrimesint
289160915Srwatsonsbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
290160915Srwatson    struct thread *td)
2911541Srgrimes{
292125454Sjhb	rlim_t sbsize_limit;
29352070Sgreen
294131006Srwatson	SOCKBUF_LOCK_ASSERT(sb);
295131006Srwatson
29652070Sgreen	/*
297183663Srwatson	 * When a thread is passed, we take into account the thread's socket
298183663Srwatson	 * buffer size limit.  The caller will generally pass curthread, but
299183663Srwatson	 * in the TCP input path, NULL will be passed to indicate that no
300183663Srwatson	 * appropriate thread resource limits are available.  In that case,
301183663Srwatson	 * we don't apply a process limit.
30252070Sgreen	 */
303101996Sdg	if (cc > sb_max_adj)
3041541Srgrimes		return (0);
305125454Sjhb	if (td != NULL) {
306125454Sjhb		PROC_LOCK(td->td_proc);
307125454Sjhb		sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
308125454Sjhb		PROC_UNLOCK(td->td_proc);
309125454Sjhb	} else
310125454Sjhb		sbsize_limit = RLIM_INFINITY;
31165495Struckman	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
312125454Sjhb	    sbsize_limit))
31352070Sgreen		return (0);
31413267Swollman	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
3151541Srgrimes	if (sb->sb_lowat > sb->sb_hiwat)
3161541Srgrimes		sb->sb_lowat = sb->sb_hiwat;
3171541Srgrimes	return (1);
3181541Srgrimes}
3191541Srgrimes
320131006Srwatsonint
321160915Srwatsonsbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
322160915Srwatson    struct thread *td)
323131006Srwatson{
324131006Srwatson	int error;
325131006Srwatson
326131006Srwatson	SOCKBUF_LOCK(sb);
327131006Srwatson	error = sbreserve_locked(sb, cc, so, td);
328131006Srwatson	SOCKBUF_UNLOCK(sb);
329131006Srwatson	return (error);
330131006Srwatson}
331131006Srwatson
3321541Srgrimes/*
3331541Srgrimes * Free mbufs held by a socket, and reserved mbuf space.
3341541Srgrimes */
335175968Srwatsonvoid
336160915Srwatsonsbrelease_internal(struct sockbuf *sb, struct socket *so)
337160875Srwatson{
338160875Srwatson
339160875Srwatson	sbflush_internal(sb);
340160875Srwatson	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
341160875Srwatson	    RLIM_INFINITY);
342160875Srwatson	sb->sb_mbmax = 0;
343160875Srwatson}
344160875Srwatson
3451549Srgrimesvoid
346160915Srwatsonsbrelease_locked(struct sockbuf *sb, struct socket *so)
3471541Srgrimes{
3481541Srgrimes
349130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
350130831Srwatson
351160875Srwatson	sbrelease_internal(sb, so);
3521541Srgrimes}
3531541Srgrimes
354130831Srwatsonvoid
355160915Srwatsonsbrelease(struct sockbuf *sb, struct socket *so)
356130831Srwatson{
357130831Srwatson
358130831Srwatson	SOCKBUF_LOCK(sb);
359130831Srwatson	sbrelease_locked(sb, so);
360130831Srwatson	SOCKBUF_UNLOCK(sb);
361130831Srwatson}
362160875Srwatson
363160875Srwatsonvoid
364160915Srwatsonsbdestroy(struct sockbuf *sb, struct socket *so)
365160875Srwatson{
366160875Srwatson
367160875Srwatson	sbrelease_internal(sb, so);
368160875Srwatson}
369160875Srwatson
3701541Srgrimes/*
371160915Srwatson * Routines to add and remove data from an mbuf queue.
3721541Srgrimes *
373160915Srwatson * The routines sbappend() or sbappendrecord() are normally called to append
374160915Srwatson * new mbufs to a socket buffer, after checking that adequate space is
375160915Srwatson * available, comparing the function sbspace() with the amount of data to be
376160915Srwatson * added.  sbappendrecord() differs from sbappend() in that data supplied is
377160915Srwatson * treated as the beginning of a new record.  To place a sender's address,
378160915Srwatson * optional access rights, and data in a socket receive buffer,
379160915Srwatson * sbappendaddr() should be used.  To place access rights and data in a
380160915Srwatson * socket receive buffer, sbappendrights() should be used.  In either case,
381160915Srwatson * the new data begins a new record.  Note that unlike sbappend() and
382160915Srwatson * sbappendrecord(), these routines check for the caller that there will be
383160915Srwatson * enough space to store the data.  Each fails if there is not enough space,
384160915Srwatson * or if it cannot find mbufs to store additional information in.
3851541Srgrimes *
386160915Srwatson * Reliable protocols may use the socket send buffer to hold data awaiting
387160915Srwatson * acknowledgement.  Data is normally copied from a socket send buffer in a
388160915Srwatson * protocol with m_copy for output to a peer, and then removing the data from
389160915Srwatson * the socket buffer with sbdrop() or sbdroprecord() when the data is
390160915Srwatson * acknowledged by the peer.
3911541Srgrimes */
392121628Ssam#ifdef SOCKBUF_DEBUG
393121628Ssamvoid
394121628Ssamsblastrecordchk(struct sockbuf *sb, const char *file, int line)
395121628Ssam{
396121628Ssam	struct mbuf *m = sb->sb_mb;
397121628Ssam
398130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
399130831Srwatson
400121628Ssam	while (m && m->m_nextpkt)
401121628Ssam		m = m->m_nextpkt;
402121628Ssam
403121628Ssam	if (m != sb->sb_lastrecord) {
404121628Ssam		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
405121628Ssam			__func__, sb->sb_mb, sb->sb_lastrecord, m);
406121628Ssam		printf("packet chain:\n");
407121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
408121628Ssam			printf("\t%p\n", m);
409121628Ssam		panic("%s from %s:%u", __func__, file, line);
410121628Ssam	}
411121628Ssam}
412121628Ssam
413121628Ssamvoid
414121628Ssamsblastmbufchk(struct sockbuf *sb, const char *file, int line)
415121628Ssam{
416121628Ssam	struct mbuf *m = sb->sb_mb;
417121628Ssam	struct mbuf *n;
418121628Ssam
419130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
420130831Srwatson
421121628Ssam	while (m && m->m_nextpkt)
422121628Ssam		m = m->m_nextpkt;
423121628Ssam
424121628Ssam	while (m && m->m_next)
425121628Ssam		m = m->m_next;
426121628Ssam
427121628Ssam	if (m != sb->sb_mbtail) {
428121628Ssam		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
429121628Ssam			__func__, sb->sb_mb, sb->sb_mbtail, m);
430121628Ssam		printf("packet tree:\n");
431121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
432121628Ssam			printf("\t");
433121628Ssam			for (n = m; n != NULL; n = n->m_next)
434121628Ssam				printf("%p ", n);
435121628Ssam			printf("\n");
436121628Ssam		}
437121628Ssam		panic("%s from %s:%u", __func__, file, line);
438121628Ssam	}
439121628Ssam}
440121628Ssam#endif /* SOCKBUF_DEBUG */
441121628Ssam
442121628Ssam#define SBLINKRECORD(sb, m0) do {					\
443130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);					\
444121628Ssam	if ((sb)->sb_lastrecord != NULL)				\
445121628Ssam		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
446121628Ssam	else								\
447121628Ssam		(sb)->sb_mb = (m0);					\
448121628Ssam	(sb)->sb_lastrecord = (m0);					\
449121628Ssam} while (/*CONSTCOND*/0)
450121628Ssam
4511541Srgrimes/*
452160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
453160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
454160915Srwatson * are discarded and mbufs are compacted where possible.
4551541Srgrimes */
4561549Srgrimesvoid
457160915Srwatsonsbappend_locked(struct sockbuf *sb, struct mbuf *m)
4581541Srgrimes{
459160915Srwatson	struct mbuf *n;
4601541Srgrimes
461130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
462130831Srwatson
4631541Srgrimes	if (m == 0)
4641541Srgrimes		return;
465130831Srwatson
466121628Ssam	SBLASTRECORDCHK(sb);
4673308Sphk	n = sb->sb_mb;
4683308Sphk	if (n) {
4691541Srgrimes		while (n->m_nextpkt)
4701541Srgrimes			n = n->m_nextpkt;
4711541Srgrimes		do {
4721541Srgrimes			if (n->m_flags & M_EOR) {
473130831Srwatson				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
4741541Srgrimes				return;
4751541Srgrimes			}
4761541Srgrimes		} while (n->m_next && (n = n->m_next));
477121628Ssam	} else {
478121628Ssam		/*
479121628Ssam		 * XXX Would like to simply use sb_mbtail here, but
480121628Ssam		 * XXX I need to verify that I won't miss an EOR that
481121628Ssam		 * XXX way.
482121628Ssam		 */
483121628Ssam		if ((n = sb->sb_lastrecord) != NULL) {
484121628Ssam			do {
485121628Ssam				if (n->m_flags & M_EOR) {
486130831Srwatson					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
487121628Ssam					return;
488121628Ssam				}
489121628Ssam			} while (n->m_next && (n = n->m_next));
490121628Ssam		} else {
491121628Ssam			/*
492121628Ssam			 * If this is the first record in the socket buffer,
493121628Ssam			 * it's also the last record.
494121628Ssam			 */
495121628Ssam			sb->sb_lastrecord = m;
496121628Ssam		}
4971541Srgrimes	}
4981541Srgrimes	sbcompress(sb, m, n);
499121628Ssam	SBLASTRECORDCHK(sb);
5001541Srgrimes}
5011541Srgrimes
502121628Ssam/*
503160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
504160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
505160915Srwatson * are discarded and mbufs are compacted where possible.
506130831Srwatson */
507130831Srwatsonvoid
508160915Srwatsonsbappend(struct sockbuf *sb, struct mbuf *m)
509130831Srwatson{
510130831Srwatson
511130831Srwatson	SOCKBUF_LOCK(sb);
512130831Srwatson	sbappend_locked(sb, m);
513130831Srwatson	SOCKBUF_UNLOCK(sb);
514130831Srwatson}
515130831Srwatson
516130831Srwatson/*
517160915Srwatson * This version of sbappend() should only be used when the caller absolutely
518160915Srwatson * knows that there will never be more than one record in the socket buffer,
519160915Srwatson * that is, a stream protocol (such as TCP).
520121628Ssam */
521121628Ssamvoid
522130831Srwatsonsbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
523121628Ssam{
524130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
525121628Ssam
526121628Ssam	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
527121628Ssam	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
528121628Ssam
529121628Ssam	SBLASTMBUFCHK(sb);
530121628Ssam
531121628Ssam	sbcompress(sb, m, sb->sb_mbtail);
532121628Ssam
533121628Ssam	sb->sb_lastrecord = sb->sb_mb;
534121628Ssam	SBLASTRECORDCHK(sb);
535121628Ssam}
536121628Ssam
537130831Srwatson/*
538160915Srwatson * This version of sbappend() should only be used when the caller absolutely
539160915Srwatson * knows that there will never be more than one record in the socket buffer,
540160915Srwatson * that is, a stream protocol (such as TCP).
541130831Srwatson */
542130831Srwatsonvoid
543130831Srwatsonsbappendstream(struct sockbuf *sb, struct mbuf *m)
544130831Srwatson{
545130831Srwatson
546130831Srwatson	SOCKBUF_LOCK(sb);
547130831Srwatson	sbappendstream_locked(sb, m);
548130831Srwatson	SOCKBUF_UNLOCK(sb);
549130831Srwatson}
550130831Srwatson
5511541Srgrimes#ifdef SOCKBUF_DEBUG
5521549Srgrimesvoid
553160915Srwatsonsbcheck(struct sockbuf *sb)
5541541Srgrimes{
555103554Sphk	struct mbuf *m;
556103554Sphk	struct mbuf *n = 0;
557103554Sphk	u_long len = 0, mbcnt = 0;
5581541Srgrimes
559130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
560130831Srwatson
56140913Sfenner	for (m = sb->sb_mb; m; m = n) {
56240913Sfenner	    n = m->m_nextpkt;
56340913Sfenner	    for (; m; m = m->m_next) {
5641541Srgrimes		len += m->m_len;
5651541Srgrimes		mbcnt += MSIZE;
56617675Sjulian		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
5671541Srgrimes			mbcnt += m->m_ext.ext_size;
56840913Sfenner	    }
5691541Srgrimes	}
5701541Srgrimes	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
571121628Ssam		printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
5721541Srgrimes		    mbcnt, sb->sb_mbcnt);
5731541Srgrimes		panic("sbcheck");
5741541Srgrimes	}
5751541Srgrimes}
5761541Srgrimes#endif
5771541Srgrimes
5781541Srgrimes/*
579160915Srwatson * As above, except the mbuf chain begins a new record.
5801541Srgrimes */
5811549Srgrimesvoid
582160915Srwatsonsbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
5831541Srgrimes{
584160915Srwatson	struct mbuf *m;
5851541Srgrimes
586130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
587130831Srwatson
5881541Srgrimes	if (m0 == 0)
5891541Srgrimes		return;
5901541Srgrimes	/*
591160915Srwatson	 * Put the first mbuf on the queue.  Note this permits zero length
592160915Srwatson	 * records.
5931541Srgrimes	 */
5941541Srgrimes	sballoc(sb, m0);
595121628Ssam	SBLASTRECORDCHK(sb);
596121628Ssam	SBLINKRECORD(sb, m0);
597191366Semax	sb->sb_mbtail = m0;
5981541Srgrimes	m = m0->m_next;
5991541Srgrimes	m0->m_next = 0;
6001541Srgrimes	if (m && (m0->m_flags & M_EOR)) {
6011541Srgrimes		m0->m_flags &= ~M_EOR;
6021541Srgrimes		m->m_flags |= M_EOR;
6031541Srgrimes	}
604191366Semax	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
6051541Srgrimes	sbcompress(sb, m, m0);
6061541Srgrimes}
6071541Srgrimes
6081541Srgrimes/*
609160915Srwatson * As above, except the mbuf chain begins a new record.
610130831Srwatson */
611130831Srwatsonvoid
612160915Srwatsonsbappendrecord(struct sockbuf *sb, struct mbuf *m0)
613130831Srwatson{
614130831Srwatson
615130831Srwatson	SOCKBUF_LOCK(sb);
616130831Srwatson	sbappendrecord_locked(sb, m0);
617130831Srwatson	SOCKBUF_UNLOCK(sb);
618130831Srwatson}
619130831Srwatson
620130831Srwatson/*
621160915Srwatson * Append address and data, and optionally, control (ancillary) data to the
622160915Srwatson * receive queue of a socket.  If present, m0 must include a packet header
623160915Srwatson * with total length.  Returns 0 if no space in sockbuf or insufficient
624160915Srwatson * mbufs.
6251541Srgrimes */
6261549Srgrimesint
627160915Srwatsonsbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
628160915Srwatson    struct mbuf *m0, struct mbuf *control)
6291541Srgrimes{
630121628Ssam	struct mbuf *m, *n, *nlast;
6311541Srgrimes	int space = asa->sa_len;
6321541Srgrimes
633130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
634130831Srwatson
63578945Sjlemon	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
636130831Srwatson		panic("sbappendaddr_locked");
6371541Srgrimes	if (m0)
6381541Srgrimes		space += m0->m_pkthdr.len;
639103554Sphk	space += m_length(control, &n);
640130831Srwatson
6411541Srgrimes	if (space > sbspace(sb))
6421541Srgrimes		return (0);
643118045Sscottl#if MSIZE <= 256
6441541Srgrimes	if (asa->sa_len > MLEN)
6451541Srgrimes		return (0);
646118045Sscottl#endif
647111119Simp	MGET(m, M_DONTWAIT, MT_SONAME);
6481541Srgrimes	if (m == 0)
6491541Srgrimes		return (0);
6501541Srgrimes	m->m_len = asa->sa_len;
65198998Salfred	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
6521541Srgrimes	if (n)
6531541Srgrimes		n->m_next = m0;		/* concatenate data to control */
6541541Srgrimes	else
6551541Srgrimes		control = m0;
6561541Srgrimes	m->m_next = control;
657121628Ssam	for (n = m; n->m_next != NULL; n = n->m_next)
6581541Srgrimes		sballoc(sb, n);
659121628Ssam	sballoc(sb, n);
660121628Ssam	nlast = n;
661121628Ssam	SBLINKRECORD(sb, m);
662121628Ssam
663121628Ssam	sb->sb_mbtail = nlast;
664121628Ssam	SBLASTMBUFCHK(sb);
665121628Ssam
666121628Ssam	SBLASTRECORDCHK(sb);
6671541Srgrimes	return (1);
6681541Srgrimes}
6691541Srgrimes
670130831Srwatson/*
671160915Srwatson * Append address and data, and optionally, control (ancillary) data to the
672160915Srwatson * receive queue of a socket.  If present, m0 must include a packet header
673160915Srwatson * with total length.  Returns 0 if no space in sockbuf or insufficient
674160915Srwatson * mbufs.
675130831Srwatson */
6761549Srgrimesint
677160915Srwatsonsbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
678160915Srwatson    struct mbuf *m0, struct mbuf *control)
679130831Srwatson{
680130831Srwatson	int retval;
681130831Srwatson
682130831Srwatson	SOCKBUF_LOCK(sb);
683130831Srwatson	retval = sbappendaddr_locked(sb, asa, m0, control);
684130831Srwatson	SOCKBUF_UNLOCK(sb);
685130831Srwatson	return (retval);
686130831Srwatson}
687130831Srwatson
688130831Srwatsonint
689160915Srwatsonsbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
690160915Srwatson    struct mbuf *control)
6911541Srgrimes{
692121628Ssam	struct mbuf *m, *n, *mlast;
693103554Sphk	int space;
6941541Srgrimes
695130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
696130831Srwatson
6971541Srgrimes	if (control == 0)
698130831Srwatson		panic("sbappendcontrol_locked");
699103554Sphk	space = m_length(control, &n) + m_length(m0, NULL);
700130831Srwatson
7011541Srgrimes	if (space > sbspace(sb))
7021541Srgrimes		return (0);
7031541Srgrimes	n->m_next = m0;			/* concatenate data to control */
704121628Ssam
705121628Ssam	SBLASTRECORDCHK(sb);
706121628Ssam
707121628Ssam	for (m = control; m->m_next; m = m->m_next)
7081541Srgrimes		sballoc(sb, m);
709121628Ssam	sballoc(sb, m);
710121628Ssam	mlast = m;
711121628Ssam	SBLINKRECORD(sb, control);
712121628Ssam
713121628Ssam	sb->sb_mbtail = mlast;
714121628Ssam	SBLASTMBUFCHK(sb);
715121628Ssam
716121628Ssam	SBLASTRECORDCHK(sb);
7171541Srgrimes	return (1);
7181541Srgrimes}
7191541Srgrimes
720130831Srwatsonint
721160915Srwatsonsbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
722130831Srwatson{
723130831Srwatson	int retval;
724130831Srwatson
725130831Srwatson	SOCKBUF_LOCK(sb);
726130831Srwatson	retval = sbappendcontrol_locked(sb, m0, control);
727130831Srwatson	SOCKBUF_UNLOCK(sb);
728130831Srwatson	return (retval);
729130831Srwatson}
730130831Srwatson
7311541Srgrimes/*
732150280Srwatson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
733150280Srwatson * (n).  If (n) is NULL, the buffer is presumed empty.
734150280Srwatson *
735150280Srwatson * When the data is compressed, mbufs in the chain may be handled in one of
736150280Srwatson * three ways:
737150280Srwatson *
738150280Srwatson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
739150280Srwatson *     record boundary, and no change in data type).
740150280Srwatson *
741150280Srwatson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
742150280Srwatson *     an mbuf already in the socket buffer.  This can occur if an
743150280Srwatson *     appropriate mbuf exists, there is room, and no merging of data types
744150280Srwatson *     will occur.
745150280Srwatson *
746150280Srwatson * (3) The mbuf may be appended to the end of the existing mbuf chain.
747150280Srwatson *
748150280Srwatson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
749150280Srwatson * end-of-record.
7501541Srgrimes */
7511549Srgrimesvoid
752160915Srwatsonsbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
7531541Srgrimes{
754160915Srwatson	int eor = 0;
755160915Srwatson	struct mbuf *o;
7561541Srgrimes
757130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
758130831Srwatson
7591541Srgrimes	while (m) {
7601541Srgrimes		eor |= m->m_flags & M_EOR;
7611541Srgrimes		if (m->m_len == 0 &&
7621541Srgrimes		    (eor == 0 ||
7631541Srgrimes		     (((o = m->m_next) || (o = n)) &&
7641541Srgrimes		      o->m_type == m->m_type))) {
765121628Ssam			if (sb->sb_lastrecord == m)
766121628Ssam				sb->sb_lastrecord = m->m_next;
7671541Srgrimes			m = m_free(m);
7681541Srgrimes			continue;
7691541Srgrimes		}
77068918Sdwmalone		if (n && (n->m_flags & M_EOR) == 0 &&
77168918Sdwmalone		    M_WRITABLE(n) &&
772174711Skmacy		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
77368918Sdwmalone		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
77468918Sdwmalone		    m->m_len <= M_TRAILINGSPACE(n) &&
7751541Srgrimes		    n->m_type == m->m_type) {
7761541Srgrimes			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
7771541Srgrimes			    (unsigned)m->m_len);
7781541Srgrimes			n->m_len += m->m_len;
7791541Srgrimes			sb->sb_cc += m->m_len;
780151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
781109098Stjr				/* XXX: Probably don't need.*/
782106473Skbyanc				sb->sb_ctl += m->m_len;
7831541Srgrimes			m = m_free(m);
7841541Srgrimes			continue;
7851541Srgrimes		}
7861541Srgrimes		if (n)
7871541Srgrimes			n->m_next = m;
7881541Srgrimes		else
7891541Srgrimes			sb->sb_mb = m;
790121628Ssam		sb->sb_mbtail = m;
7911541Srgrimes		sballoc(sb, m);
7921541Srgrimes		n = m;
7931541Srgrimes		m->m_flags &= ~M_EOR;
7941541Srgrimes		m = m->m_next;
7951541Srgrimes		n->m_next = 0;
7961541Srgrimes	}
7971541Srgrimes	if (eor) {
798150280Srwatson		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
799150280Srwatson		n->m_flags |= eor;
8001541Srgrimes	}
801121628Ssam	SBLASTMBUFCHK(sb);
8021541Srgrimes}
8031541Srgrimes
8041541Srgrimes/*
805160915Srwatson * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
8061541Srgrimes */
807160875Srwatsonstatic void
808160915Srwatsonsbflush_internal(struct sockbuf *sb)
8091541Srgrimes{
8101541Srgrimes
81151757Spb	while (sb->sb_mbcnt) {
81251757Spb		/*
81351757Spb		 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
81451757Spb		 * we would loop forever. Panic instead.
81551757Spb		 */
81651757Spb		if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
81751757Spb			break;
818160875Srwatson		sbdrop_internal(sb, (int)sb->sb_cc);
81951757Spb	}
82040913Sfenner	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
821160915Srwatson		panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
822160915Srwatson		    sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
8231541Srgrimes}
8241541Srgrimes
825130831Srwatsonvoid
826160915Srwatsonsbflush_locked(struct sockbuf *sb)
827160875Srwatson{
828160875Srwatson
829160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
830160875Srwatson	sbflush_internal(sb);
831160875Srwatson}
832160875Srwatson
833160875Srwatsonvoid
834160915Srwatsonsbflush(struct sockbuf *sb)
835130831Srwatson{
836130831Srwatson
837130831Srwatson	SOCKBUF_LOCK(sb);
838130831Srwatson	sbflush_locked(sb);
839130831Srwatson	SOCKBUF_UNLOCK(sb);
840130831Srwatson}
841130831Srwatson
8421541Srgrimes/*
8431541Srgrimes * Drop data from (the front of) a sockbuf.
8441541Srgrimes */
845160875Srwatsonstatic void
846160915Srwatsonsbdrop_internal(struct sockbuf *sb, int len)
8471541Srgrimes{
848160915Srwatson	struct mbuf *m;
8491541Srgrimes	struct mbuf *next;
8501541Srgrimes
8511541Srgrimes	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
8521541Srgrimes	while (len > 0) {
8531541Srgrimes		if (m == 0) {
8541541Srgrimes			if (next == 0)
8551541Srgrimes				panic("sbdrop");
8561541Srgrimes			m = next;
8571541Srgrimes			next = m->m_nextpkt;
8581541Srgrimes			continue;
8591541Srgrimes		}
8601541Srgrimes		if (m->m_len > len) {
8611541Srgrimes			m->m_len -= len;
8621541Srgrimes			m->m_data += len;
8631541Srgrimes			sb->sb_cc -= len;
864167715Sandre			if (sb->sb_sndptroff != 0)
865167715Sandre				sb->sb_sndptroff -= len;
866151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
867106473Skbyanc				sb->sb_ctl -= len;
8681541Srgrimes			break;
8691541Srgrimes		}
8701541Srgrimes		len -= m->m_len;
8711541Srgrimes		sbfree(sb, m);
87290227Sdillon		m = m_free(m);
8731541Srgrimes	}
8741541Srgrimes	while (m && m->m_len == 0) {
8751541Srgrimes		sbfree(sb, m);
87690227Sdillon		m = m_free(m);
8771541Srgrimes	}
8781541Srgrimes	if (m) {
8791541Srgrimes		sb->sb_mb = m;
8801541Srgrimes		m->m_nextpkt = next;
8811541Srgrimes	} else
8821541Srgrimes		sb->sb_mb = next;
883121628Ssam	/*
884160915Srwatson	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
885160915Srwatson	 * sb_lastrecord is up-to-date if we dropped part of the last record.
886121628Ssam	 */
887121628Ssam	m = sb->sb_mb;
888121628Ssam	if (m == NULL) {
889121628Ssam		sb->sb_mbtail = NULL;
890121628Ssam		sb->sb_lastrecord = NULL;
891121628Ssam	} else if (m->m_nextpkt == NULL) {
892121628Ssam		sb->sb_lastrecord = m;
893121628Ssam	}
8941541Srgrimes}
8951541Srgrimes
8961541Srgrimes/*
897130831Srwatson * Drop data from (the front of) a sockbuf.
898130831Srwatson */
899130831Srwatsonvoid
900160915Srwatsonsbdrop_locked(struct sockbuf *sb, int len)
901160875Srwatson{
902160875Srwatson
903160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
904160875Srwatson
905160875Srwatson	sbdrop_internal(sb, len);
906160875Srwatson}
907160875Srwatson
908160875Srwatsonvoid
909160915Srwatsonsbdrop(struct sockbuf *sb, int len)
910130831Srwatson{
911130831Srwatson
912130831Srwatson	SOCKBUF_LOCK(sb);
913130831Srwatson	sbdrop_locked(sb, len);
914130831Srwatson	SOCKBUF_UNLOCK(sb);
915130831Srwatson}
916130831Srwatson
917130831Srwatson/*
918167715Sandre * Maintain a pointer and offset pair into the socket buffer mbuf chain to
919167715Sandre * avoid traversal of the entire socket buffer for larger offsets.
920167715Sandre */
921167715Sandrestruct mbuf *
922167715Sandresbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
923167715Sandre{
924167715Sandre	struct mbuf *m, *ret;
925167715Sandre
926167715Sandre	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
927167715Sandre	KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
928167715Sandre	KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
929167715Sandre
930167715Sandre	/*
931167715Sandre	 * Is off below stored offset? Happens on retransmits.
932167715Sandre	 * Just return, we can't help here.
933167715Sandre	 */
934167715Sandre	if (sb->sb_sndptroff > off) {
935167715Sandre		*moff = off;
936167715Sandre		return (sb->sb_mb);
937167715Sandre	}
938167715Sandre
939167715Sandre	/* Return closest mbuf in chain for current offset. */
940167715Sandre	*moff = off - sb->sb_sndptroff;
941167715Sandre	m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
942167715Sandre
943167715Sandre	/* Advance by len to be as close as possible for the next transmit. */
944167715Sandre	for (off = off - sb->sb_sndptroff + len - 1;
945182842Sbz	     off > 0 && m != NULL && off >= m->m_len;
946167715Sandre	     m = m->m_next) {
947167715Sandre		sb->sb_sndptroff += m->m_len;
948167715Sandre		off -= m->m_len;
949167715Sandre	}
950182842Sbz	if (off > 0 && m == NULL)
951182842Sbz		panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
952167715Sandre	sb->sb_sndptr = m;
953167715Sandre
954167715Sandre	return (ret);
955167715Sandre}
956167715Sandre
957167715Sandre/*
958160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
959160915Srwatson * front.
9601541Srgrimes */
9611549Srgrimesvoid
962160915Srwatsonsbdroprecord_locked(struct sockbuf *sb)
9631541Srgrimes{
964160915Srwatson	struct mbuf *m;
9651541Srgrimes
966130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
967130831Srwatson
9681541Srgrimes	m = sb->sb_mb;
9691541Srgrimes	if (m) {
9701541Srgrimes		sb->sb_mb = m->m_nextpkt;
9711541Srgrimes		do {
9721541Srgrimes			sbfree(sb, m);
97390227Sdillon			m = m_free(m);
9743308Sphk		} while (m);
9751541Srgrimes	}
976121628Ssam	SB_EMPTY_FIXUP(sb);
9771541Srgrimes}
97817047Swollman
97919622Sfenner/*
980160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
981160915Srwatson * front.
982130831Srwatson */
983130831Srwatsonvoid
984160915Srwatsonsbdroprecord(struct sockbuf *sb)
985130831Srwatson{
986130831Srwatson
987130831Srwatson	SOCKBUF_LOCK(sb);
988130831Srwatson	sbdroprecord_locked(sb);
989130831Srwatson	SOCKBUF_UNLOCK(sb);
990130831Srwatson}
991130831Srwatson
992167895Srwatson/*
993167902Srwatson * Create a "control" mbuf containing the specified data with the specified
994167902Srwatson * type for presentation on a socket buffer.
995167895Srwatson */
996167895Srwatsonstruct mbuf *
997169624Srwatsonsbcreatecontrol(caddr_t p, int size, int type, int level)
998167895Srwatson{
999169624Srwatson	struct cmsghdr *cp;
1000167895Srwatson	struct mbuf *m;
1001167895Srwatson
1002167895Srwatson	if (CMSG_SPACE((u_int)size) > MCLBYTES)
1003167895Srwatson		return ((struct mbuf *) NULL);
1004167895Srwatson	if (CMSG_SPACE((u_int)size) > MLEN)
1005167895Srwatson		m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
1006167895Srwatson	else
1007167895Srwatson		m = m_get(M_DONTWAIT, MT_CONTROL);
1008167895Srwatson	if (m == NULL)
1009167895Srwatson		return ((struct mbuf *) NULL);
1010167895Srwatson	cp = mtod(m, struct cmsghdr *);
1011167895Srwatson	m->m_len = 0;
1012167895Srwatson	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
1013167895Srwatson	    ("sbcreatecontrol: short mbuf"));
1014167895Srwatson	if (p != NULL)
1015167895Srwatson		(void)memcpy(CMSG_DATA(cp), p, size);
1016167895Srwatson	m->m_len = CMSG_SPACE(size);
1017167895Srwatson	cp->cmsg_len = CMSG_LEN(size);
1018167895Srwatson	cp->cmsg_level = level;
1019167895Srwatson	cp->cmsg_type = type;
1020167895Srwatson	return (m);
1021167895Srwatson}
1022167895Srwatson
1023167895Srwatson/*
1024167902Srwatson * This does the same for socket buffers that sotoxsocket does for sockets:
1025167902Srwatson * generate an user-format data structure describing the socket buffer.  Note
1026167902Srwatson * that the xsockbuf structure, since it is always embedded in a socket, does
1027167902Srwatson * not include a self pointer nor a length.  We make this entry point public
1028167902Srwatson * in case some other mechanism needs it.
1029167895Srwatson */
1030167895Srwatsonvoid
1031167895Srwatsonsbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1032167895Srwatson{
1033169624Srwatson
1034167895Srwatson	xsb->sb_cc = sb->sb_cc;
1035167895Srwatson	xsb->sb_hiwat = sb->sb_hiwat;
1036167895Srwatson	xsb->sb_mbcnt = sb->sb_mbcnt;
1037179027Sgnn	xsb->sb_mcnt = sb->sb_mcnt;
1038179027Sgnn	xsb->sb_ccnt = sb->sb_ccnt;
1039167895Srwatson	xsb->sb_mbmax = sb->sb_mbmax;
1040167895Srwatson	xsb->sb_lowat = sb->sb_lowat;
1041167895Srwatson	xsb->sb_flags = sb->sb_flags;
1042167895Srwatson	xsb->sb_timeo = sb->sb_timeo;
1043167895Srwatson}
1044167895Srwatson
104523081Swollman/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
104623081Swollmanstatic int dummy;
104723081SwollmanSYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1048160621SrwatsonSYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
1049110268Sharti    &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
1050110268ShartiSYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1051228449Seadler    &sb_efficiency, 0, "Socket buffer size waste factor");
1052