uipc_sockbuf.c revision 174647
1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
301541Srgrimes */
311541Srgrimes
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/uipc_sockbuf.c 174647 2007-12-16 06:21:20Z jeff $");
34116182Sobrien
3577598Sjesper#include "opt_param.h"
36101013Srwatson
371541Srgrimes#include <sys/param.h>
3895759Stanimura#include <sys/aio.h> /* for aio_swake proto */
3912041Swollman#include <sys/kernel.h>
4076166Smarkm#include <sys/lock.h>
411541Srgrimes#include <sys/mbuf.h>
4295759Stanimura#include <sys/mutex.h>
4376166Smarkm#include <sys/proc.h>
441541Srgrimes#include <sys/protosw.h>
4551381Sgreen#include <sys/resourcevar.h>
4695759Stanimura#include <sys/signalvar.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49169236Srwatson#include <sys/sx.h>
5012041Swollman#include <sys/sysctl.h>
511541Srgrimes
52160621Srwatson/*
53160621Srwatson * Function pointer set by the AIO routines so that the socket buffer code
54160621Srwatson * can call back into the AIO module if it is loaded.
55160621Srwatson */
56160621Srwatsonvoid	(*aio_swake)(struct socket *, struct sockbuf *);
5788633Salfred
581541Srgrimes/*
59160621Srwatson * Primitive routines for operating on socket buffers
601541Srgrimes */
611541Srgrimes
62101996Sdgu_long	sb_max = SB_MAX;
63172557Smohansu_long sb_max_adj =
64172557Smohans       SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
651541Srgrimes
6613267Swollmanstatic	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
6713267Swollman
68160915Srwatsonstatic void	sbdrop_internal(struct sockbuf *sb, int len);
69160915Srwatsonstatic void	sbflush_internal(struct sockbuf *sb);
70160875Srwatsonstatic void	sbrelease_internal(struct sockbuf *sb, struct socket *so);
71160875Srwatson
721541Srgrimes/*
73160915Srwatson * Socantsendmore indicates that no more data will be sent on the socket; it
74160915Srwatson * would normally be applied to a socket when the user informs the system
75160915Srwatson * that no more data is to be sent, by the protocol code (in case
76160915Srwatson * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
77160915Srwatson * received, and will normally be applied to the socket by a protocol when it
78160915Srwatson * detects that the peer will send no more data.  Data queued for reading in
79160915Srwatson * the socket may yet be read.
801541Srgrimes */
81130831Srwatsonvoid
82160915Srwatsonsocantsendmore_locked(struct socket *so)
83130831Srwatson{
841541Srgrimes
85130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_snd);
86130831Srwatson
87130831Srwatson	so->so_snd.sb_state |= SBS_CANTSENDMORE;
88130831Srwatson	sowwakeup_locked(so);
89130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
90130831Srwatson}
91130831Srwatson
921549Srgrimesvoid
93160915Srwatsonsocantsendmore(struct socket *so)
941541Srgrimes{
951541Srgrimes
96130831Srwatson	SOCKBUF_LOCK(&so->so_snd);
97130831Srwatson	socantsendmore_locked(so);
98130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
991541Srgrimes}
1001541Srgrimes
1011549Srgrimesvoid
102160915Srwatsonsocantrcvmore_locked(struct socket *so)
1031541Srgrimes{
1041541Srgrimes
105130831Srwatson	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
106130831Srwatson
107130480Srwatson	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
108130831Srwatson	sorwakeup_locked(so);
109130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
1101541Srgrimes}
1111541Srgrimes
112130831Srwatsonvoid
113160915Srwatsonsocantrcvmore(struct socket *so)
114130831Srwatson{
115130831Srwatson
116130831Srwatson	SOCKBUF_LOCK(&so->so_rcv);
117130831Srwatson	socantrcvmore_locked(so);
118130831Srwatson	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
119130831Srwatson}
120130831Srwatson
1211541Srgrimes/*
1221541Srgrimes * Wait for data to arrive at/drain from a socket buffer.
1231541Srgrimes */
1241549Srgrimesint
125160915Srwatsonsbwait(struct sockbuf *sb)
1261541Srgrimes{
1271541Srgrimes
128130705Srwatson	SOCKBUF_LOCK_ASSERT(sb);
129130705Srwatson
1301541Srgrimes	sb->sb_flags |= SB_WAIT;
131130705Srwatson	return (msleep(&sb->sb_cc, &sb->sb_mtx,
13212843Sbde	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
1331541Srgrimes	    sb->sb_timeo));
1341541Srgrimes}
1351541Srgrimes
1361549Srgrimesint
137169236Srwatsonsblock(struct sockbuf *sb, int flags)
1381541Srgrimes{
1391541Srgrimes
140169236Srwatson	if (flags == M_WAITOK) {
141170151Srwatson		if (sb->sb_flags & SB_NOINTR) {
142170151Srwatson			sx_xlock(&sb->sb_sx);
143170151Srwatson			return (0);
144170151Srwatson		}
145170151Srwatson		return (sx_xlock_sig(&sb->sb_sx));
146169236Srwatson	} else {
147169236Srwatson		if (sx_try_xlock(&sb->sb_sx) == 0)
148169236Srwatson			return (EWOULDBLOCK);
149169236Srwatson		return (0);
1501541Srgrimes	}
1511541Srgrimes}
1521541Srgrimes
153169236Srwatsonvoid
154169236Srwatsonsbunlock(struct sockbuf *sb)
155169236Srwatson{
156169236Srwatson
157169236Srwatson	sx_xunlock(&sb->sb_sx);
158169236Srwatson}
159169236Srwatson
1601541Srgrimes/*
161160915Srwatson * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
162160915Srwatson * via SIGIO if the socket has the SS_ASYNC flag set.
163130831Srwatson *
164130831Srwatson * Called with the socket buffer lock held; will release the lock by the end
165130831Srwatson * of the function.  This allows the caller to acquire the socket buffer lock
166130831Srwatson * while testing for the need for various sorts of wakeup and hold it through
167130831Srwatson * to the point where it's no longer required.  We currently hold the lock
168130831Srwatson * through calls out to other subsystems (with the exception of kqueue), and
169130831Srwatson * then release it to avoid lock order issues.  It's not clear that's
170130831Srwatson * correct.
1711541Srgrimes */
1721549Srgrimesvoid
173160915Srwatsonsowakeup(struct socket *so, struct sockbuf *sb)
1741541Srgrimes{
17595552Stanimura
176130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
177130831Srwatson
178122352Stanimura	selwakeuppri(&sb->sb_sel, PSOCK);
179174647Sjeff	if (!SEL_WAITING(&sb->sb_sel))
180174647Sjeff		sb->sb_flags &= ~SB_SEL;
1811541Srgrimes	if (sb->sb_flags & SB_WAIT) {
1821541Srgrimes		sb->sb_flags &= ~SB_WAIT;
18398998Salfred		wakeup(&sb->sb_cc);
1841541Srgrimes	}
185133741Sjmg	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
186130831Srwatson	SOCKBUF_UNLOCK(sb);
18797658Stanimura	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
18895883Salfred		pgsigio(&so->so_sigio, SIGIO, 0);
18997658Stanimura	if (sb->sb_flags & SB_UPCALL)
190111119Simp		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
19197658Stanimura	if (sb->sb_flags & SB_AIO)
19255943Sjasone		aio_swake(so, sb);
193130831Srwatson	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
1941541Srgrimes}
1951541Srgrimes
1961541Srgrimes/*
1971541Srgrimes * Socket buffer (struct sockbuf) utility routines.
1981541Srgrimes *
199160915Srwatson * Each socket contains two socket buffers: one for sending data and one for
200160915Srwatson * receiving data.  Each buffer contains a queue of mbufs, information about
201160915Srwatson * the number of mbufs and amount of data in the queue, and other fields
202160915Srwatson * allowing select() statements and notification on data availability to be
203160915Srwatson * implemented.
2041541Srgrimes *
205160915Srwatson * Data stored in a socket buffer is maintained as a list of records.  Each
206160915Srwatson * record is a list of mbufs chained together with the m_next field.  Records
207160915Srwatson * are chained together with the m_nextpkt field. The upper level routine
208160915Srwatson * soreceive() expects the following conventions to be observed when placing
209160915Srwatson * information in the receive buffer:
2101541Srgrimes *
211160915Srwatson * 1. If the protocol requires each message be preceded by the sender's name,
212160915Srwatson *    then a record containing that name must be present before any
213160915Srwatson *    associated data (mbuf's must be of type MT_SONAME).
214160915Srwatson * 2. If the protocol supports the exchange of ``access rights'' (really just
215160915Srwatson *    additional data associated with the message), and there are ``rights''
216160915Srwatson *    to be received, then a record containing this data should be present
217160915Srwatson *    (mbuf's must be of type MT_RIGHTS).
218160915Srwatson * 3. If a name or rights record exists, then it must be followed by a data
219160915Srwatson *    record, perhaps of zero length.
2201541Srgrimes *
2211541Srgrimes * Before using a new socket structure it is first necessary to reserve
2221541Srgrimes * buffer space to the socket, by calling sbreserve().  This should commit
2231541Srgrimes * some of the available buffer space in the system buffer pool for the
224160915Srwatson * socket (currently, it does nothing but enforce limits).  The space should
225160915Srwatson * be released by calling sbrelease() when the socket is destroyed.
2261541Srgrimes */
2271549Srgrimesint
228160915Srwatsonsoreserve(struct socket *so, u_long sndcc, u_long rcvcc)
2291541Srgrimes{
23083366Sjulian	struct thread *td = curthread;
2311541Srgrimes
232131006Srwatson	SOCKBUF_LOCK(&so->so_snd);
233131006Srwatson	SOCKBUF_LOCK(&so->so_rcv);
234131006Srwatson	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
2351541Srgrimes		goto bad;
236131006Srwatson	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
2371541Srgrimes		goto bad2;
2381541Srgrimes	if (so->so_rcv.sb_lowat == 0)
2391541Srgrimes		so->so_rcv.sb_lowat = 1;
2401541Srgrimes	if (so->so_snd.sb_lowat == 0)
2411541Srgrimes		so->so_snd.sb_lowat = MCLBYTES;
2421541Srgrimes	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
2431541Srgrimes		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
244131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
245130653Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2461541Srgrimes	return (0);
2471541Srgrimesbad2:
248131006Srwatson	sbrelease_locked(&so->so_snd, so);
2491541Srgrimesbad:
250131006Srwatson	SOCKBUF_UNLOCK(&so->so_rcv);
251131006Srwatson	SOCKBUF_UNLOCK(&so->so_snd);
2521541Srgrimes	return (ENOBUFS);
2531541Srgrimes}
2541541Srgrimes
255101996Sdgstatic int
256101996Sdgsysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
257101996Sdg{
258101996Sdg	int error = 0;
259162086Sjhb	u_long tmp_sb_max = sb_max;
260101996Sdg
261162086Sjhb	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
262101996Sdg	if (error || !req->newptr)
263101996Sdg		return (error);
264162086Sjhb	if (tmp_sb_max < MSIZE + MCLBYTES)
265101996Sdg		return (EINVAL);
266162086Sjhb	sb_max = tmp_sb_max;
267101996Sdg	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
268101996Sdg	return (0);
269101996Sdg}
270101996Sdg
2711541Srgrimes/*
272160915Srwatson * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
273160915Srwatson * become limiting if buffering efficiency is near the normal case.
2741541Srgrimes */
2751549Srgrimesint
276160915Srwatsonsbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
277160915Srwatson    struct thread *td)
2781541Srgrimes{
279125454Sjhb	rlim_t sbsize_limit;
28052070Sgreen
281131006Srwatson	SOCKBUF_LOCK_ASSERT(sb);
282131006Srwatson
28352070Sgreen	/*
284160915Srwatson	 * td will only be NULL when we're in an interrupt (e.g. in
285160915Srwatson	 * tcp_input()).
286160915Srwatson	 *
287160915Srwatson	 * XXXRW: This comment needs updating, as might the code.
28852070Sgreen	 */
289101996Sdg	if (cc > sb_max_adj)
2901541Srgrimes		return (0);
291125454Sjhb	if (td != NULL) {
292125454Sjhb		PROC_LOCK(td->td_proc);
293125454Sjhb		sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
294125454Sjhb		PROC_UNLOCK(td->td_proc);
295125454Sjhb	} else
296125454Sjhb		sbsize_limit = RLIM_INFINITY;
29765495Struckman	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
298125454Sjhb	    sbsize_limit))
29952070Sgreen		return (0);
30013267Swollman	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
3011541Srgrimes	if (sb->sb_lowat > sb->sb_hiwat)
3021541Srgrimes		sb->sb_lowat = sb->sb_hiwat;
3031541Srgrimes	return (1);
3041541Srgrimes}
3051541Srgrimes
306131006Srwatsonint
307160915Srwatsonsbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
308160915Srwatson    struct thread *td)
309131006Srwatson{
310131006Srwatson	int error;
311131006Srwatson
312131006Srwatson	SOCKBUF_LOCK(sb);
313131006Srwatson	error = sbreserve_locked(sb, cc, so, td);
314131006Srwatson	SOCKBUF_UNLOCK(sb);
315131006Srwatson	return (error);
316131006Srwatson}
317131006Srwatson
3181541Srgrimes/*
3191541Srgrimes * Free mbufs held by a socket, and reserved mbuf space.
3201541Srgrimes */
321160875Srwatsonstatic void
322160915Srwatsonsbrelease_internal(struct sockbuf *sb, struct socket *so)
323160875Srwatson{
324160875Srwatson
325160875Srwatson	sbflush_internal(sb);
326160875Srwatson	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
327160875Srwatson	    RLIM_INFINITY);
328160875Srwatson	sb->sb_mbmax = 0;
329160875Srwatson}
330160875Srwatson
3311549Srgrimesvoid
332160915Srwatsonsbrelease_locked(struct sockbuf *sb, struct socket *so)
3331541Srgrimes{
3341541Srgrimes
335130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
336130831Srwatson
337160875Srwatson	sbrelease_internal(sb, so);
3381541Srgrimes}
3391541Srgrimes
340130831Srwatsonvoid
341160915Srwatsonsbrelease(struct sockbuf *sb, struct socket *so)
342130831Srwatson{
343130831Srwatson
344130831Srwatson	SOCKBUF_LOCK(sb);
345130831Srwatson	sbrelease_locked(sb, so);
346130831Srwatson	SOCKBUF_UNLOCK(sb);
347130831Srwatson}
348160875Srwatson
349160875Srwatsonvoid
350160915Srwatsonsbdestroy(struct sockbuf *sb, struct socket *so)
351160875Srwatson{
352160875Srwatson
353160875Srwatson	sbrelease_internal(sb, so);
354160875Srwatson}
355160875Srwatson
3561541Srgrimes/*
357160915Srwatson * Routines to add and remove data from an mbuf queue.
3581541Srgrimes *
359160915Srwatson * The routines sbappend() or sbappendrecord() are normally called to append
360160915Srwatson * new mbufs to a socket buffer, after checking that adequate space is
361160915Srwatson * available, comparing the function sbspace() with the amount of data to be
362160915Srwatson * added.  sbappendrecord() differs from sbappend() in that data supplied is
363160915Srwatson * treated as the beginning of a new record.  To place a sender's address,
364160915Srwatson * optional access rights, and data in a socket receive buffer,
365160915Srwatson * sbappendaddr() should be used.  To place access rights and data in a
366160915Srwatson * socket receive buffer, sbappendrights() should be used.  In either case,
367160915Srwatson * the new data begins a new record.  Note that unlike sbappend() and
368160915Srwatson * sbappendrecord(), these routines check for the caller that there will be
369160915Srwatson * enough space to store the data.  Each fails if there is not enough space,
370160915Srwatson * or if it cannot find mbufs to store additional information in.
3711541Srgrimes *
372160915Srwatson * Reliable protocols may use the socket send buffer to hold data awaiting
373160915Srwatson * acknowledgement.  Data is normally copied from a socket send buffer in a
374160915Srwatson * protocol with m_copy for output to a peer, and then removing the data from
375160915Srwatson * the socket buffer with sbdrop() or sbdroprecord() when the data is
376160915Srwatson * acknowledged by the peer.
3771541Srgrimes */
378121628Ssam#ifdef SOCKBUF_DEBUG
379121628Ssamvoid
380121628Ssamsblastrecordchk(struct sockbuf *sb, const char *file, int line)
381121628Ssam{
382121628Ssam	struct mbuf *m = sb->sb_mb;
383121628Ssam
384130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
385130831Srwatson
386121628Ssam	while (m && m->m_nextpkt)
387121628Ssam		m = m->m_nextpkt;
388121628Ssam
389121628Ssam	if (m != sb->sb_lastrecord) {
390121628Ssam		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
391121628Ssam			__func__, sb->sb_mb, sb->sb_lastrecord, m);
392121628Ssam		printf("packet chain:\n");
393121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
394121628Ssam			printf("\t%p\n", m);
395121628Ssam		panic("%s from %s:%u", __func__, file, line);
396121628Ssam	}
397121628Ssam}
398121628Ssam
399121628Ssamvoid
400121628Ssamsblastmbufchk(struct sockbuf *sb, const char *file, int line)
401121628Ssam{
402121628Ssam	struct mbuf *m = sb->sb_mb;
403121628Ssam	struct mbuf *n;
404121628Ssam
405130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
406130831Srwatson
407121628Ssam	while (m && m->m_nextpkt)
408121628Ssam		m = m->m_nextpkt;
409121628Ssam
410121628Ssam	while (m && m->m_next)
411121628Ssam		m = m->m_next;
412121628Ssam
413121628Ssam	if (m != sb->sb_mbtail) {
414121628Ssam		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
415121628Ssam			__func__, sb->sb_mb, sb->sb_mbtail, m);
416121628Ssam		printf("packet tree:\n");
417121628Ssam		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
418121628Ssam			printf("\t");
419121628Ssam			for (n = m; n != NULL; n = n->m_next)
420121628Ssam				printf("%p ", n);
421121628Ssam			printf("\n");
422121628Ssam		}
423121628Ssam		panic("%s from %s:%u", __func__, file, line);
424121628Ssam	}
425121628Ssam}
426121628Ssam#endif /* SOCKBUF_DEBUG */
427121628Ssam
428121628Ssam#define SBLINKRECORD(sb, m0) do {					\
429130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);					\
430121628Ssam	if ((sb)->sb_lastrecord != NULL)				\
431121628Ssam		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
432121628Ssam	else								\
433121628Ssam		(sb)->sb_mb = (m0);					\
434121628Ssam	(sb)->sb_lastrecord = (m0);					\
435121628Ssam} while (/*CONSTCOND*/0)
436121628Ssam
4371541Srgrimes/*
438160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
439160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
440160915Srwatson * are discarded and mbufs are compacted where possible.
4411541Srgrimes */
4421549Srgrimesvoid
443160915Srwatsonsbappend_locked(struct sockbuf *sb, struct mbuf *m)
4441541Srgrimes{
445160915Srwatson	struct mbuf *n;
4461541Srgrimes
447130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
448130831Srwatson
4491541Srgrimes	if (m == 0)
4501541Srgrimes		return;
451130831Srwatson
452121628Ssam	SBLASTRECORDCHK(sb);
4533308Sphk	n = sb->sb_mb;
4543308Sphk	if (n) {
4551541Srgrimes		while (n->m_nextpkt)
4561541Srgrimes			n = n->m_nextpkt;
4571541Srgrimes		do {
4581541Srgrimes			if (n->m_flags & M_EOR) {
459130831Srwatson				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
4601541Srgrimes				return;
4611541Srgrimes			}
4621541Srgrimes		} while (n->m_next && (n = n->m_next));
463121628Ssam	} else {
464121628Ssam		/*
465121628Ssam		 * XXX Would like to simply use sb_mbtail here, but
466121628Ssam		 * XXX I need to verify that I won't miss an EOR that
467121628Ssam		 * XXX way.
468121628Ssam		 */
469121628Ssam		if ((n = sb->sb_lastrecord) != NULL) {
470121628Ssam			do {
471121628Ssam				if (n->m_flags & M_EOR) {
472130831Srwatson					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
473121628Ssam					return;
474121628Ssam				}
475121628Ssam			} while (n->m_next && (n = n->m_next));
476121628Ssam		} else {
477121628Ssam			/*
478121628Ssam			 * If this is the first record in the socket buffer,
479121628Ssam			 * it's also the last record.
480121628Ssam			 */
481121628Ssam			sb->sb_lastrecord = m;
482121628Ssam		}
4831541Srgrimes	}
4841541Srgrimes	sbcompress(sb, m, n);
485121628Ssam	SBLASTRECORDCHK(sb);
4861541Srgrimes}
4871541Srgrimes
488121628Ssam/*
489160915Srwatson * Append mbuf chain m to the last record in the socket buffer sb.  The
490160915Srwatson * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
491160915Srwatson * are discarded and mbufs are compacted where possible.
492130831Srwatson */
493130831Srwatsonvoid
494160915Srwatsonsbappend(struct sockbuf *sb, struct mbuf *m)
495130831Srwatson{
496130831Srwatson
497130831Srwatson	SOCKBUF_LOCK(sb);
498130831Srwatson	sbappend_locked(sb, m);
499130831Srwatson	SOCKBUF_UNLOCK(sb);
500130831Srwatson}
501130831Srwatson
502130831Srwatson/*
503160915Srwatson * This version of sbappend() should only be used when the caller absolutely
504160915Srwatson * knows that there will never be more than one record in the socket buffer,
505160915Srwatson * that is, a stream protocol (such as TCP).
506121628Ssam */
507121628Ssamvoid
508130831Srwatsonsbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
509121628Ssam{
510130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
511121628Ssam
512121628Ssam	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
513121628Ssam	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
514121628Ssam
515121628Ssam	SBLASTMBUFCHK(sb);
516121628Ssam
517121628Ssam	sbcompress(sb, m, sb->sb_mbtail);
518121628Ssam
519121628Ssam	sb->sb_lastrecord = sb->sb_mb;
520121628Ssam	SBLASTRECORDCHK(sb);
521121628Ssam}
522121628Ssam
523130831Srwatson/*
524160915Srwatson * This version of sbappend() should only be used when the caller absolutely
525160915Srwatson * knows that there will never be more than one record in the socket buffer,
526160915Srwatson * that is, a stream protocol (such as TCP).
527130831Srwatson */
528130831Srwatsonvoid
529130831Srwatsonsbappendstream(struct sockbuf *sb, struct mbuf *m)
530130831Srwatson{
531130831Srwatson
532130831Srwatson	SOCKBUF_LOCK(sb);
533130831Srwatson	sbappendstream_locked(sb, m);
534130831Srwatson	SOCKBUF_UNLOCK(sb);
535130831Srwatson}
536130831Srwatson
5371541Srgrimes#ifdef SOCKBUF_DEBUG
5381549Srgrimesvoid
539160915Srwatsonsbcheck(struct sockbuf *sb)
5401541Srgrimes{
541103554Sphk	struct mbuf *m;
542103554Sphk	struct mbuf *n = 0;
543103554Sphk	u_long len = 0, mbcnt = 0;
5441541Srgrimes
545130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
546130831Srwatson
54740913Sfenner	for (m = sb->sb_mb; m; m = n) {
54840913Sfenner	    n = m->m_nextpkt;
54940913Sfenner	    for (; m; m = m->m_next) {
5501541Srgrimes		len += m->m_len;
5511541Srgrimes		mbcnt += MSIZE;
55217675Sjulian		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
5531541Srgrimes			mbcnt += m->m_ext.ext_size;
55440913Sfenner	    }
5551541Srgrimes	}
5561541Srgrimes	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
557121628Ssam		printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
5581541Srgrimes		    mbcnt, sb->sb_mbcnt);
5591541Srgrimes		panic("sbcheck");
5601541Srgrimes	}
5611541Srgrimes}
5621541Srgrimes#endif
5631541Srgrimes
5641541Srgrimes/*
565160915Srwatson * As above, except the mbuf chain begins a new record.
5661541Srgrimes */
5671549Srgrimesvoid
568160915Srwatsonsbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
5691541Srgrimes{
570160915Srwatson	struct mbuf *m;
5711541Srgrimes
572130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
573130831Srwatson
5741541Srgrimes	if (m0 == 0)
5751541Srgrimes		return;
5763308Sphk	m = sb->sb_mb;
5773308Sphk	if (m)
5781541Srgrimes		while (m->m_nextpkt)
5791541Srgrimes			m = m->m_nextpkt;
5801541Srgrimes	/*
581160915Srwatson	 * Put the first mbuf on the queue.  Note this permits zero length
582160915Srwatson	 * records.
5831541Srgrimes	 */
5841541Srgrimes	sballoc(sb, m0);
585121628Ssam	SBLASTRECORDCHK(sb);
586121628Ssam	SBLINKRECORD(sb, m0);
5871541Srgrimes	if (m)
5881541Srgrimes		m->m_nextpkt = m0;
5891541Srgrimes	else
5901541Srgrimes		sb->sb_mb = m0;
5911541Srgrimes	m = m0->m_next;
5921541Srgrimes	m0->m_next = 0;
5931541Srgrimes	if (m && (m0->m_flags & M_EOR)) {
5941541Srgrimes		m0->m_flags &= ~M_EOR;
5951541Srgrimes		m->m_flags |= M_EOR;
5961541Srgrimes	}
5971541Srgrimes	sbcompress(sb, m, m0);
5981541Srgrimes}
5991541Srgrimes
6001541Srgrimes/*
601160915Srwatson * As above, except the mbuf chain begins a new record.
602130831Srwatson */
603130831Srwatsonvoid
604160915Srwatsonsbappendrecord(struct sockbuf *sb, struct mbuf *m0)
605130831Srwatson{
606130831Srwatson
607130831Srwatson	SOCKBUF_LOCK(sb);
608130831Srwatson	sbappendrecord_locked(sb, m0);
609130831Srwatson	SOCKBUF_UNLOCK(sb);
610130831Srwatson}
611130831Srwatson
612130831Srwatson/*
613160915Srwatson * Append address and data, and optionally, control (ancillary) data to the
614160915Srwatson * receive queue of a socket.  If present, m0 must include a packet header
615160915Srwatson * with total length.  Returns 0 if no space in sockbuf or insufficient
616160915Srwatson * mbufs.
6171541Srgrimes */
6181549Srgrimesint
619160915Srwatsonsbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
620160915Srwatson    struct mbuf *m0, struct mbuf *control)
6211541Srgrimes{
622121628Ssam	struct mbuf *m, *n, *nlast;
6231541Srgrimes	int space = asa->sa_len;
6241541Srgrimes
625130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
626130831Srwatson
62778945Sjlemon	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
628130831Srwatson		panic("sbappendaddr_locked");
6291541Srgrimes	if (m0)
6301541Srgrimes		space += m0->m_pkthdr.len;
631103554Sphk	space += m_length(control, &n);
632130831Srwatson
6331541Srgrimes	if (space > sbspace(sb))
6341541Srgrimes		return (0);
635118045Sscottl#if MSIZE <= 256
6361541Srgrimes	if (asa->sa_len > MLEN)
6371541Srgrimes		return (0);
638118045Sscottl#endif
639111119Simp	MGET(m, M_DONTWAIT, MT_SONAME);
6401541Srgrimes	if (m == 0)
6411541Srgrimes		return (0);
6421541Srgrimes	m->m_len = asa->sa_len;
64398998Salfred	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
6441541Srgrimes	if (n)
6451541Srgrimes		n->m_next = m0;		/* concatenate data to control */
6461541Srgrimes	else
6471541Srgrimes		control = m0;
6481541Srgrimes	m->m_next = control;
649121628Ssam	for (n = m; n->m_next != NULL; n = n->m_next)
6501541Srgrimes		sballoc(sb, n);
651121628Ssam	sballoc(sb, n);
652121628Ssam	nlast = n;
653121628Ssam	SBLINKRECORD(sb, m);
654121628Ssam
655121628Ssam	sb->sb_mbtail = nlast;
656121628Ssam	SBLASTMBUFCHK(sb);
657121628Ssam
658121628Ssam	SBLASTRECORDCHK(sb);
6591541Srgrimes	return (1);
6601541Srgrimes}
6611541Srgrimes
662130831Srwatson/*
663160915Srwatson * Append address and data, and optionally, control (ancillary) data to the
664160915Srwatson * receive queue of a socket.  If present, m0 must include a packet header
665160915Srwatson * with total length.  Returns 0 if no space in sockbuf or insufficient
666160915Srwatson * mbufs.
667130831Srwatson */
6681549Srgrimesint
669160915Srwatsonsbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
670160915Srwatson    struct mbuf *m0, struct mbuf *control)
671130831Srwatson{
672130831Srwatson	int retval;
673130831Srwatson
674130831Srwatson	SOCKBUF_LOCK(sb);
675130831Srwatson	retval = sbappendaddr_locked(sb, asa, m0, control);
676130831Srwatson	SOCKBUF_UNLOCK(sb);
677130831Srwatson	return (retval);
678130831Srwatson}
679130831Srwatson
680130831Srwatsonint
681160915Srwatsonsbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
682160915Srwatson    struct mbuf *control)
6831541Srgrimes{
684121628Ssam	struct mbuf *m, *n, *mlast;
685103554Sphk	int space;
6861541Srgrimes
687130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
688130831Srwatson
6891541Srgrimes	if (control == 0)
690130831Srwatson		panic("sbappendcontrol_locked");
691103554Sphk	space = m_length(control, &n) + m_length(m0, NULL);
692130831Srwatson
6931541Srgrimes	if (space > sbspace(sb))
6941541Srgrimes		return (0);
6951541Srgrimes	n->m_next = m0;			/* concatenate data to control */
696121628Ssam
697121628Ssam	SBLASTRECORDCHK(sb);
698121628Ssam
699121628Ssam	for (m = control; m->m_next; m = m->m_next)
7001541Srgrimes		sballoc(sb, m);
701121628Ssam	sballoc(sb, m);
702121628Ssam	mlast = m;
703121628Ssam	SBLINKRECORD(sb, control);
704121628Ssam
705121628Ssam	sb->sb_mbtail = mlast;
706121628Ssam	SBLASTMBUFCHK(sb);
707121628Ssam
708121628Ssam	SBLASTRECORDCHK(sb);
7091541Srgrimes	return (1);
7101541Srgrimes}
7111541Srgrimes
712130831Srwatsonint
713160915Srwatsonsbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
714130831Srwatson{
715130831Srwatson	int retval;
716130831Srwatson
717130831Srwatson	SOCKBUF_LOCK(sb);
718130831Srwatson	retval = sbappendcontrol_locked(sb, m0, control);
719130831Srwatson	SOCKBUF_UNLOCK(sb);
720130831Srwatson	return (retval);
721130831Srwatson}
722130831Srwatson
7231541Srgrimes/*
724150280Srwatson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
725150280Srwatson * (n).  If (n) is NULL, the buffer is presumed empty.
726150280Srwatson *
727150280Srwatson * When the data is compressed, mbufs in the chain may be handled in one of
728150280Srwatson * three ways:
729150280Srwatson *
730150280Srwatson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
731150280Srwatson *     record boundary, and no change in data type).
732150280Srwatson *
733150280Srwatson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
734150280Srwatson *     an mbuf already in the socket buffer.  This can occur if an
735150280Srwatson *     appropriate mbuf exists, there is room, and no merging of data types
736150280Srwatson *     will occur.
737150280Srwatson *
738150280Srwatson * (3) The mbuf may be appended to the end of the existing mbuf chain.
739150280Srwatson *
740150280Srwatson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
741150280Srwatson * end-of-record.
7421541Srgrimes */
7431549Srgrimesvoid
744160915Srwatsonsbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
7451541Srgrimes{
746160915Srwatson	int eor = 0;
747160915Srwatson	struct mbuf *o;
7481541Srgrimes
749130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
750130831Srwatson
7511541Srgrimes	while (m) {
7521541Srgrimes		eor |= m->m_flags & M_EOR;
7531541Srgrimes		if (m->m_len == 0 &&
7541541Srgrimes		    (eor == 0 ||
7551541Srgrimes		     (((o = m->m_next) || (o = n)) &&
7561541Srgrimes		      o->m_type == m->m_type))) {
757121628Ssam			if (sb->sb_lastrecord == m)
758121628Ssam				sb->sb_lastrecord = m->m_next;
7591541Srgrimes			m = m_free(m);
7601541Srgrimes			continue;
7611541Srgrimes		}
76268918Sdwmalone		if (n && (n->m_flags & M_EOR) == 0 &&
76368918Sdwmalone		    M_WRITABLE(n) &&
76468918Sdwmalone		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
76568918Sdwmalone		    m->m_len <= M_TRAILINGSPACE(n) &&
7661541Srgrimes		    n->m_type == m->m_type) {
7671541Srgrimes			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
7681541Srgrimes			    (unsigned)m->m_len);
7691541Srgrimes			n->m_len += m->m_len;
7701541Srgrimes			sb->sb_cc += m->m_len;
771151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
772109098Stjr				/* XXX: Probably don't need.*/
773106473Skbyanc				sb->sb_ctl += m->m_len;
7741541Srgrimes			m = m_free(m);
7751541Srgrimes			continue;
7761541Srgrimes		}
7771541Srgrimes		if (n)
7781541Srgrimes			n->m_next = m;
7791541Srgrimes		else
7801541Srgrimes			sb->sb_mb = m;
781121628Ssam		sb->sb_mbtail = m;
7821541Srgrimes		sballoc(sb, m);
7831541Srgrimes		n = m;
7841541Srgrimes		m->m_flags &= ~M_EOR;
7851541Srgrimes		m = m->m_next;
7861541Srgrimes		n->m_next = 0;
7871541Srgrimes	}
7881541Srgrimes	if (eor) {
789150280Srwatson		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
790150280Srwatson		n->m_flags |= eor;
7911541Srgrimes	}
792121628Ssam	SBLASTMBUFCHK(sb);
7931541Srgrimes}
7941541Srgrimes
7951541Srgrimes/*
796160915Srwatson * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
7971541Srgrimes */
798160875Srwatsonstatic void
799160915Srwatsonsbflush_internal(struct sockbuf *sb)
8001541Srgrimes{
8011541Srgrimes
80251757Spb	while (sb->sb_mbcnt) {
80351757Spb		/*
80451757Spb		 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
80551757Spb		 * we would loop forever. Panic instead.
80651757Spb		 */
80751757Spb		if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
80851757Spb			break;
809160875Srwatson		sbdrop_internal(sb, (int)sb->sb_cc);
81051757Spb	}
81140913Sfenner	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
812160915Srwatson		panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
813160915Srwatson		    sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
8141541Srgrimes}
8151541Srgrimes
816130831Srwatsonvoid
817160915Srwatsonsbflush_locked(struct sockbuf *sb)
818160875Srwatson{
819160875Srwatson
820160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
821160875Srwatson	sbflush_internal(sb);
822160875Srwatson}
823160875Srwatson
824160875Srwatsonvoid
825160915Srwatsonsbflush(struct sockbuf *sb)
826130831Srwatson{
827130831Srwatson
828130831Srwatson	SOCKBUF_LOCK(sb);
829130831Srwatson	sbflush_locked(sb);
830130831Srwatson	SOCKBUF_UNLOCK(sb);
831130831Srwatson}
832130831Srwatson
8331541Srgrimes/*
8341541Srgrimes * Drop data from (the front of) a sockbuf.
8351541Srgrimes */
836160875Srwatsonstatic void
837160915Srwatsonsbdrop_internal(struct sockbuf *sb, int len)
8381541Srgrimes{
839160915Srwatson	struct mbuf *m;
8401541Srgrimes	struct mbuf *next;
8411541Srgrimes
8421541Srgrimes	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
8431541Srgrimes	while (len > 0) {
8441541Srgrimes		if (m == 0) {
8451541Srgrimes			if (next == 0)
8461541Srgrimes				panic("sbdrop");
8471541Srgrimes			m = next;
8481541Srgrimes			next = m->m_nextpkt;
8491541Srgrimes			continue;
8501541Srgrimes		}
8511541Srgrimes		if (m->m_len > len) {
8521541Srgrimes			m->m_len -= len;
8531541Srgrimes			m->m_data += len;
8541541Srgrimes			sb->sb_cc -= len;
855167715Sandre			if (sb->sb_sndptroff != 0)
856167715Sandre				sb->sb_sndptroff -= len;
857151967Sandre			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
858106473Skbyanc				sb->sb_ctl -= len;
8591541Srgrimes			break;
8601541Srgrimes		}
8611541Srgrimes		len -= m->m_len;
8621541Srgrimes		sbfree(sb, m);
86390227Sdillon		m = m_free(m);
8641541Srgrimes	}
8651541Srgrimes	while (m && m->m_len == 0) {
8661541Srgrimes		sbfree(sb, m);
86790227Sdillon		m = m_free(m);
8681541Srgrimes	}
8691541Srgrimes	if (m) {
8701541Srgrimes		sb->sb_mb = m;
8711541Srgrimes		m->m_nextpkt = next;
8721541Srgrimes	} else
8731541Srgrimes		sb->sb_mb = next;
874121628Ssam	/*
875160915Srwatson	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
876160915Srwatson	 * sb_lastrecord is up-to-date if we dropped part of the last record.
877121628Ssam	 */
878121628Ssam	m = sb->sb_mb;
879121628Ssam	if (m == NULL) {
880121628Ssam		sb->sb_mbtail = NULL;
881121628Ssam		sb->sb_lastrecord = NULL;
882121628Ssam	} else if (m->m_nextpkt == NULL) {
883121628Ssam		sb->sb_lastrecord = m;
884121628Ssam	}
8851541Srgrimes}
8861541Srgrimes
8871541Srgrimes/*
888130831Srwatson * Drop data from (the front of) a sockbuf.
889130831Srwatson */
890130831Srwatsonvoid
891160915Srwatsonsbdrop_locked(struct sockbuf *sb, int len)
892160875Srwatson{
893160875Srwatson
894160875Srwatson	SOCKBUF_LOCK_ASSERT(sb);
895160875Srwatson
896160875Srwatson	sbdrop_internal(sb, len);
897160875Srwatson}
898160875Srwatson
899160875Srwatsonvoid
900160915Srwatsonsbdrop(struct sockbuf *sb, int len)
901130831Srwatson{
902130831Srwatson
903130831Srwatson	SOCKBUF_LOCK(sb);
904130831Srwatson	sbdrop_locked(sb, len);
905130831Srwatson	SOCKBUF_UNLOCK(sb);
906130831Srwatson}
907130831Srwatson
908130831Srwatson/*
909167715Sandre * Maintain a pointer and offset pair into the socket buffer mbuf chain to
910167715Sandre * avoid traversal of the entire socket buffer for larger offsets.
911167715Sandre */
912167715Sandrestruct mbuf *
913167715Sandresbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
914167715Sandre{
915167715Sandre	struct mbuf *m, *ret;
916167715Sandre
917167715Sandre	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
918167715Sandre	KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
919167715Sandre	KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
920167715Sandre
921167715Sandre	/*
922167715Sandre	 * Is off below stored offset? Happens on retransmits.
923167715Sandre	 * Just return, we can't help here.
924167715Sandre	 */
925167715Sandre	if (sb->sb_sndptroff > off) {
926167715Sandre		*moff = off;
927167715Sandre		return (sb->sb_mb);
928167715Sandre	}
929167715Sandre
930167715Sandre	/* Return closest mbuf in chain for current offset. */
931167715Sandre	*moff = off - sb->sb_sndptroff;
932167715Sandre	m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
933167715Sandre
934167715Sandre	/* Advance by len to be as close as possible for the next transmit. */
935167715Sandre	for (off = off - sb->sb_sndptroff + len - 1;
936167715Sandre	     off > 0 && off >= m->m_len;
937167715Sandre	     m = m->m_next) {
938167715Sandre		sb->sb_sndptroff += m->m_len;
939167715Sandre		off -= m->m_len;
940167715Sandre	}
941167715Sandre	sb->sb_sndptr = m;
942167715Sandre
943167715Sandre	return (ret);
944167715Sandre}
945167715Sandre
946167715Sandre/*
947160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
948160915Srwatson * front.
9491541Srgrimes */
9501549Srgrimesvoid
951160915Srwatsonsbdroprecord_locked(struct sockbuf *sb)
9521541Srgrimes{
953160915Srwatson	struct mbuf *m;
9541541Srgrimes
955130831Srwatson	SOCKBUF_LOCK_ASSERT(sb);
956130831Srwatson
9571541Srgrimes	m = sb->sb_mb;
9581541Srgrimes	if (m) {
9591541Srgrimes		sb->sb_mb = m->m_nextpkt;
9601541Srgrimes		do {
9611541Srgrimes			sbfree(sb, m);
96290227Sdillon			m = m_free(m);
9633308Sphk		} while (m);
9641541Srgrimes	}
965121628Ssam	SB_EMPTY_FIXUP(sb);
9661541Srgrimes}
96717047Swollman
96819622Sfenner/*
969160915Srwatson * Drop a record off the front of a sockbuf and move the next record to the
970160915Srwatson * front.
971130831Srwatson */
972130831Srwatsonvoid
973160915Srwatsonsbdroprecord(struct sockbuf *sb)
974130831Srwatson{
975130831Srwatson
976130831Srwatson	SOCKBUF_LOCK(sb);
977130831Srwatson	sbdroprecord_locked(sb);
978130831Srwatson	SOCKBUF_UNLOCK(sb);
979130831Srwatson}
980130831Srwatson
981167895Srwatson/*
982167902Srwatson * Create a "control" mbuf containing the specified data with the specified
983167902Srwatson * type for presentation on a socket buffer.
984167895Srwatson */
985167895Srwatsonstruct mbuf *
986169624Srwatsonsbcreatecontrol(caddr_t p, int size, int type, int level)
987167895Srwatson{
988169624Srwatson	struct cmsghdr *cp;
989167895Srwatson	struct mbuf *m;
990167895Srwatson
991167895Srwatson	if (CMSG_SPACE((u_int)size) > MCLBYTES)
992167895Srwatson		return ((struct mbuf *) NULL);
993167895Srwatson	if (CMSG_SPACE((u_int)size) > MLEN)
994167895Srwatson		m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
995167895Srwatson	else
996167895Srwatson		m = m_get(M_DONTWAIT, MT_CONTROL);
997167895Srwatson	if (m == NULL)
998167895Srwatson		return ((struct mbuf *) NULL);
999167895Srwatson	cp = mtod(m, struct cmsghdr *);
1000167895Srwatson	m->m_len = 0;
1001167895Srwatson	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
1002167895Srwatson	    ("sbcreatecontrol: short mbuf"));
1003167895Srwatson	if (p != NULL)
1004167895Srwatson		(void)memcpy(CMSG_DATA(cp), p, size);
1005167895Srwatson	m->m_len = CMSG_SPACE(size);
1006167895Srwatson	cp->cmsg_len = CMSG_LEN(size);
1007167895Srwatson	cp->cmsg_level = level;
1008167895Srwatson	cp->cmsg_type = type;
1009167895Srwatson	return (m);
1010167895Srwatson}
1011167895Srwatson
1012167895Srwatson/*
1013167902Srwatson * This does the same for socket buffers that sotoxsocket does for sockets:
1014167902Srwatson * generate an user-format data structure describing the socket buffer.  Note
1015167902Srwatson * that the xsockbuf structure, since it is always embedded in a socket, does
1016167902Srwatson * not include a self pointer nor a length.  We make this entry point public
1017167902Srwatson * in case some other mechanism needs it.
1018167895Srwatson */
1019167895Srwatsonvoid
1020167895Srwatsonsbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1021167895Srwatson{
1022169624Srwatson
1023167895Srwatson	xsb->sb_cc = sb->sb_cc;
1024167895Srwatson	xsb->sb_hiwat = sb->sb_hiwat;
1025167895Srwatson	xsb->sb_mbcnt = sb->sb_mbcnt;
1026167895Srwatson	xsb->sb_mbmax = sb->sb_mbmax;
1027167895Srwatson	xsb->sb_lowat = sb->sb_lowat;
1028167895Srwatson	xsb->sb_flags = sb->sb_flags;
1029167895Srwatson	xsb->sb_timeo = sb->sb_timeo;
1030167895Srwatson}
1031167895Srwatson
103223081Swollman/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
103323081Swollmanstatic int dummy;
103423081SwollmanSYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1035160621SrwatsonSYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
1036110268Sharti    &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
1037110268ShartiSYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
103846381Sbillf    &sb_efficiency, 0, "");
1039