uipc_mbuf.c revision 297227
1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
301541Srgrimes */
311541Srgrimes
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 297227 2016-03-24 08:26:06Z gnn $");
34116182Sobrien
3577572Sobrien#include "opt_param.h"
36113490Ssilby#include "opt_mbuf_stress_test.h"
37178674Sjulian#include "opt_mbuf_profiling.h"
38101007Srwatson
391541Srgrimes#include <sys/param.h>
401541Srgrimes#include <sys/systm.h>
4176166Smarkm#include <sys/kernel.h>
42125296Ssilby#include <sys/limits.h>
4376166Smarkm#include <sys/lock.h>
4432036Sbde#include <sys/malloc.h>
451541Srgrimes#include <sys/mbuf.h>
4623081Swollman#include <sys/sysctl.h>
471541Srgrimes#include <sys/domain.h>
481541Srgrimes#include <sys/protosw.h>
49125296Ssilby#include <sys/uio.h>
50297188Sgnn#include <sys/sdt.h>
5176166Smarkm
52297227SgnnSDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
53297188Sgnn    "struct mbuf *", "mbufinfo_t *",
54297188Sgnn    "uint32_t", "uint32_t",
55297188Sgnn    "uint16_t", "uint16_t",
56297188Sgnn    "uint32_t", "uint32_t",
57297188Sgnn    "uint32_t", "uint32_t");
58297188Sgnn
59297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__gethdr,
60297188Sgnn    "uint32_t", "uint32_t",
61297188Sgnn    "uint16_t", "uint16_t",
62297188Sgnn    "struct mbuf *", "mbufinfo_t *");
63297188Sgnn
64297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__get,
65297188Sgnn    "uint32_t", "uint32_t",
66297188Sgnn    "uint16_t", "uint16_t",
67297188Sgnn    "struct mbuf *", "mbufinfo_t *");
68297188Sgnn
69297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__getcl,
70297188Sgnn    "uint32_t", "uint32_t",
71297188Sgnn    "uint16_t", "uint16_t",
72297188Sgnn    "uint32_t", "uint32_t",
73297188Sgnn    "struct mbuf *", "mbufinfo_t *");
74297188Sgnn
75297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__clget,
76297188Sgnn    "struct mbuf *", "mbufinfo_t *",
77297188Sgnn    "uint32_t", "uint32_t",
78297188Sgnn    "uint32_t", "uint32_t");
79297188Sgnn
80297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__cljget,
81297188Sgnn    "struct mbuf *", "mbufinfo_t *",
82297188Sgnn    "uint32_t", "uint32_t",
83297188Sgnn    "uint32_t", "uint32_t",
84297188Sgnn    "void*", "void*");
85297188Sgnn
86297227SgnnSDT_PROBE_DEFINE(sdt, , , m__cljset);
87297188Sgnn
88297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__free,
89297188Sgnn        "struct mbuf *", "mbufinfo_t *");
90297188Sgnn
91297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__freem,
92297188Sgnn    "struct mbuf *", "mbufinfo_t *");
93297188Sgnn
94295547Sglebius#include <security/mac/mac_framework.h>
95295547Sglebius
969759Sbdeint	max_linkhdr;
979759Sbdeint	max_protohdr;
989759Sbdeint	max_hdr;
999759Sbdeint	max_datalen;
100116455Ssilby#ifdef MBUF_STRESS_TEST
101112777Ssilbyint	m_defragpackets;
102112777Ssilbyint	m_defragbytes;
103112777Ssilbyint	m_defraguseless;
104112777Ssilbyint	m_defragfailure;
105113490Ssilbyint	m_defragrandomfailures;
106113490Ssilby#endif
1071541Srgrimes
10866475Sbmilekic/*
10966475Sbmilekic * sysctl(8) exported objects
11066475Sbmilekic */
111155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD,
112155820Sandre	   &max_linkhdr, 0, "Size of largest link layer header");
113155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD,
114155820Sandre	   &max_protohdr, 0, "Size of largest protocol layer header");
115155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD,
116155820Sandre	   &max_hdr, 0, "Size of largest link plus protocol header");
117155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD,
118155820Sandre	   &max_datalen, 0, "Minimum space left in mbuf after max_hdr");
119116455Ssilby#ifdef MBUF_STRESS_TEST
120112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
121112777Ssilby	   &m_defragpackets, 0, "");
122112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
123112777Ssilby	   &m_defragbytes, 0, "");
124112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
125112777Ssilby	   &m_defraguseless, 0, "");
126112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
127112777Ssilby	   &m_defragfailure, 0, "");
128113490SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
129113490Ssilby	   &m_defragrandomfailures, 0, "");
130113490Ssilby#endif
13175112Sbmilekic
1321541Srgrimes/*
133254973Sandre * Ensure the correct size of various mbuf parameters.  It could be off due
134254973Sandre * to compiler-induced padding and alignment artifacts.
135254973Sandre */
136254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
137254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
138254973Sandre
139254973Sandre/*
140277203Srwatson * mbuf data storage should be 64-bit aligned regardless of architectural
141277203Srwatson * pointer size; check this is the case with and without a packet header.
142277203Srwatson */
143277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0);
144277203SrwatsonCTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0);
145277203Srwatson
146277203Srwatson/*
147277203Srwatson * While the specific values here don't matter too much (i.e., +/- a few
148277203Srwatson * words), we do want to ensure that changes to these values are carefully
149277203Srwatson * reasoned about and properly documented.  This is especially the case as
150277203Srwatson * network-protocol and device-driver modules encode these layouts, and must
151277203Srwatson * be recompiled if the structures change.  Check these values at compile time
152277203Srwatson * against the ones documented in comments in mbuf.h.
153277203Srwatson *
154277203Srwatson * NB: Possibly they should be documented there via #define's and not just
155277203Srwatson * comments.
156277203Srwatson */
157277203Srwatson#if defined(__LP64__)
158277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 32);
159277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 56);
160278920SglebiusCTASSERT(sizeof(struct m_ext) == 48);
161277203Srwatson#else
162277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 24);
163277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 48);
164278920SglebiusCTASSERT(sizeof(struct m_ext) == 28);
165277203Srwatson#endif
166277203Srwatson
167277203Srwatson/*
168278914Sglebius * Assert that the queue(3) macros produce code of the same size as an old
169278914Sglebius * plain pointer does.
170278914Sglebius */
171278914Sglebius#ifdef INVARIANTS
172278914Sglebiusstatic struct mbuf m_assertbuf;
173278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next));
174278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next));
175278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt));
176278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt));
177278914Sglebius#endif
178278914Sglebius
179278914Sglebius/*
180218909Sbrucec * Attach the cluster from *m to *n, set up m_ext in *n
181151976Sandre * and bump the refcount of the cluster.
182151976Sandre */
183289276Shirenvoid
184296242Sglebiusmb_dupcl(struct mbuf *n, struct mbuf *m)
185151976Sandre{
186296242Sglebius	volatile u_int *refcnt;
187151976Sandre
188268535Sglebius	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
189268535Sglebius	KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
190268535Sglebius
191296242Sglebius	n->m_ext = m->m_ext;
192296242Sglebius	n->m_flags |= M_EXT;
193296242Sglebius	n->m_flags |= m->m_flags & M_RDONLY;
194296242Sglebius
195296242Sglebius	/* See if this is the mbuf that holds the embedded refcount. */
196296242Sglebius	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
197296242Sglebius		refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count;
198296242Sglebius		n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF;
199296242Sglebius	} else {
200268535Sglebius		KASSERT(m->m_ext.ext_cnt != NULL,
201268535Sglebius		    ("%s: no refcounting pointer on %p", __func__, m));
202296242Sglebius		refcnt = m->m_ext.ext_cnt;
203268535Sglebius	}
204268535Sglebius
205296242Sglebius	if (*refcnt == 1)
206296242Sglebius		*refcnt += 1;
207296242Sglebius	else
208296242Sglebius		atomic_add_int(refcnt, 1);
209151976Sandre}
210151976Sandre
211284961Snpvoid
212284961Snpm_demote_pkthdr(struct mbuf *m)
213284961Snp{
214284961Snp
215284961Snp	M_ASSERTPKTHDR(m);
216284961Snp
217284961Snp	m_tag_delete_chain(m, NULL);
218284961Snp	m->m_flags &= ~M_PKTHDR;
219284961Snp	bzero(&m->m_pkthdr, sizeof(struct pkthdr));
220284961Snp}
221284961Snp
222151976Sandre/*
223149598Sandre * Clean up mbuf (chain) from any tags and packet headers.
224149647Sandre * If "all" is set then the first mbuf in the chain will be
225149647Sandre * cleaned too.
226149598Sandre */
227149598Sandrevoid
228275329Sglebiusm_demote(struct mbuf *m0, int all, int flags)
229149598Sandre{
230149598Sandre	struct mbuf *m;
231149598Sandre
232149598Sandre	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
233271122Sglebius		KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p",
234271122Sglebius		    __func__, m, m0));
235284961Snp		if (m->m_flags & M_PKTHDR)
236284961Snp			m_demote_pkthdr(m);
237275329Sglebius		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
238149598Sandre	}
239149598Sandre}
240149598Sandre
241149598Sandre/*
242149648Sandre * Sanity checks on mbuf (chain) for use in KASSERT() and general
243149648Sandre * debugging.
244149648Sandre * Returns 0 or panics when bad and 1 on all tests passed.
245149648Sandre * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
246149648Sandre * blow up later.
247149599Sandre */
248149599Sandreint
249149599Sandrem_sanity(struct mbuf *m0, int sanitize)
250149599Sandre{
251149599Sandre	struct mbuf *m;
252149599Sandre	caddr_t a, b;
253149599Sandre	int pktlen = 0;
254149599Sandre
255168734Skmacy#ifdef INVARIANTS
256168734Skmacy#define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m)
257266876Sglebius#else
258168734Skmacy#define	M_SANITY_ACTION(s)	printf("mbuf %p: " s, m)
259168734Skmacy#endif
260149599Sandre
261149648Sandre	for (m = m0; m != NULL; m = m->m_next) {
262149599Sandre		/*
263149599Sandre		 * Basic pointer checks.  If any of these fails then some
264149599Sandre		 * unrelated kernel memory before or after us is trashed.
265149599Sandre		 * No way to recover from that.
266149599Sandre		 */
267276818Srwatson		a = M_START(m);
268276818Srwatson		b = a + M_SIZE(m);
269149599Sandre		if ((caddr_t)m->m_data < a)
270149599Sandre			M_SANITY_ACTION("m_data outside mbuf data range left");
271149599Sandre		if ((caddr_t)m->m_data > b)
272149599Sandre			M_SANITY_ACTION("m_data outside mbuf data range right");
273149599Sandre		if ((caddr_t)m->m_data + m->m_len > b)
274149599Sandre			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
275149599Sandre
276149599Sandre		/* m->m_nextpkt may only be set on first mbuf in chain. */
277149648Sandre		if (m != m0 && m->m_nextpkt != NULL) {
278149599Sandre			if (sanitize) {
279149599Sandre				m_freem(m->m_nextpkt);
280149599Sandre				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
281149599Sandre			} else
282149599Sandre				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
283149599Sandre		}
284149599Sandre
285149599Sandre		/* packet length (not mbuf length!) calculation */
286149599Sandre		if (m0->m_flags & M_PKTHDR)
287149599Sandre			pktlen += m->m_len;
288149599Sandre
289149599Sandre		/* m_tags may only be attached to first mbuf in chain. */
290149599Sandre		if (m != m0 && m->m_flags & M_PKTHDR &&
291149599Sandre		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
292149599Sandre			if (sanitize) {
293149599Sandre				m_tag_delete_chain(m, NULL);
294149599Sandre				/* put in 0xDEADC0DE perhaps? */
295149648Sandre			} else
296149599Sandre				M_SANITY_ACTION("m_tags on in-chain mbuf");
297149599Sandre		}
298149599Sandre
299149599Sandre		/* M_PKTHDR may only be set on first mbuf in chain */
300149599Sandre		if (m != m0 && m->m_flags & M_PKTHDR) {
301149599Sandre			if (sanitize) {
302149599Sandre				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
303149599Sandre				m->m_flags &= ~M_PKTHDR;
304149599Sandre				/* put in 0xDEADCODE and leave hdr flag in */
305149599Sandre			} else
306149599Sandre				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
307149599Sandre		}
308149599Sandre	}
309149648Sandre	m = m0;
310149648Sandre	if (pktlen && pktlen != m->m_pkthdr.len) {
311149599Sandre		if (sanitize)
312149648Sandre			m->m_pkthdr.len = 0;
313149599Sandre		else
314149599Sandre			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
315149599Sandre	}
316149648Sandre	return 1;
317149648Sandre
318149599Sandre#undef	M_SANITY_ACTION
319149599Sandre}
320149599Sandre
321295547Sglebius/*
322295547Sglebius * Non-inlined part of m_init().
323295547Sglebius */
324295547Sglebiusint
325295547Sglebiusm_pkthdr_init(struct mbuf *m, int how)
326295547Sglebius{
327295547Sglebius#ifdef MAC
328295547Sglebius	int error;
329295547Sglebius#endif
330295547Sglebius	m->m_data = m->m_pktdat;
331295547Sglebius	bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
332295547Sglebius#ifdef MAC
333295547Sglebius	/* If the label init fails, fail the alloc */
334295547Sglebius	error = mac_mbuf_init(m, how);
335295547Sglebius	if (error)
336295547Sglebius		return (error);
337295547Sglebius#endif
338149599Sandre
339295547Sglebius	return (0);
340295547Sglebius}
341295547Sglebius
342149599Sandre/*
343108466Ssam * "Move" mbuf pkthdr from "from" to "to".
344100960Srwatson * "from" must have M_PKTHDR set, and "to" must be empty.
345100960Srwatson */
346100960Srwatsonvoid
347108466Ssamm_move_pkthdr(struct mbuf *to, struct mbuf *from)
348100960Srwatson{
349100960Srwatson
350100960Srwatson#if 0
351108466Ssam	/* see below for why these are not enabled */
352113255Sdes	M_ASSERTPKTHDR(to);
353113487Srwatson	/* Note: with MAC, this may not be a good assertion. */
354108466Ssam	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
355108466Ssam	    ("m_move_pkthdr: to has tags"));
356100960Srwatson#endif
357101007Srwatson#ifdef MAC
358113487Srwatson	/*
359113487Srwatson	 * XXXMAC: It could be this should also occur for non-MAC?
360113487Srwatson	 */
361101007Srwatson	if (to->m_flags & M_PKTHDR)
362113487Srwatson		m_tag_delete_chain(to, NULL);
363101007Srwatson#endif
364143302Ssam	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
365143302Ssam	if ((to->m_flags & M_EXT) == 0)
366143302Ssam		to->m_data = to->m_pktdat;
367108466Ssam	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
368108466Ssam	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
369108466Ssam	from->m_flags &= ~M_PKTHDR;
370108466Ssam}
371108466Ssam
372108466Ssam/*
373108466Ssam * Duplicate "from"'s mbuf pkthdr in "to".
374108466Ssam * "from" must have M_PKTHDR set, and "to" must be empty.
375108466Ssam * In particular, this does a deep copy of the packet tags.
376108466Ssam */
377108466Ssamint
378286450Smelifarom_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
379108466Ssam{
380108466Ssam
381108466Ssam#if 0
382108466Ssam	/*
383108466Ssam	 * The mbuf allocator only initializes the pkthdr
384248372Sglebius	 * when the mbuf is allocated with m_gethdr(). Many users
385248372Sglebius	 * (e.g. m_copy*, m_prepend) use m_get() and then
386108466Ssam	 * smash the pkthdr as needed causing these
387108466Ssam	 * assertions to trip.  For now just disable them.
388108466Ssam	 */
389113255Sdes	M_ASSERTPKTHDR(to);
390113487Srwatson	/* Note: with MAC, this may not be a good assertion. */
391108466Ssam	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
392108466Ssam#endif
393132488Salfred	MBUF_CHECKSLEEP(how);
394108466Ssam#ifdef MAC
395108466Ssam	if (to->m_flags & M_PKTHDR)
396113487Srwatson		m_tag_delete_chain(to, NULL);
397108466Ssam#endif
398112733Ssilby	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
399112733Ssilby	if ((to->m_flags & M_EXT) == 0)
400112733Ssilby		to->m_data = to->m_pktdat;
401100960Srwatson	to->m_pkthdr = from->m_pkthdr;
402108466Ssam	SLIST_INIT(&to->m_pkthdr.tags);
403260716Sglebius	return (m_tag_copy_chain(to, from, how));
404100960Srwatson}
405100960Srwatson
406100960Srwatson/*
4071541Srgrimes * Lesser-used path for M_PREPEND:
4081541Srgrimes * allocate new mbuf to prepend to chain,
4091541Srgrimes * copy junk along.
4101541Srgrimes */
4111541Srgrimesstruct mbuf *
41272356Sbmilekicm_prepend(struct mbuf *m, int len, int how)
4131541Srgrimes{
4141541Srgrimes	struct mbuf *mn;
4151541Srgrimes
416117770Ssilby	if (m->m_flags & M_PKTHDR)
417248372Sglebius		mn = m_gethdr(how, m->m_type);
418117770Ssilby	else
419248372Sglebius		mn = m_get(how, m->m_type);
42072356Sbmilekic	if (mn == NULL) {
4211541Srgrimes		m_freem(m);
42272356Sbmilekic		return (NULL);
4231541Srgrimes	}
424113487Srwatson	if (m->m_flags & M_PKTHDR)
425248372Sglebius		m_move_pkthdr(mn, m);
4261541Srgrimes	mn->m_next = m;
4271541Srgrimes	m = mn;
428276692Srwatson	if (len < M_SIZE(m))
429276692Srwatson		M_ALIGN(m, len);
4301541Srgrimes	m->m_len = len;
4311541Srgrimes	return (m);
4321541Srgrimes}
4331541Srgrimes
4341541Srgrimes/*
4351541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
4361541Srgrimes * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
437243882Sglebius * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
43854002Sarchie * Note that the copy is read-only, because clusters are not copied,
43954002Sarchie * only their reference counts are incremented.
4401541Srgrimes */
4411541Srgrimesstruct mbuf *
442296242Sglebiusm_copym(struct mbuf *m, int off0, int len, int wait)
4431541Srgrimes{
44472356Sbmilekic	struct mbuf *n, **np;
44572356Sbmilekic	int off = off0;
4461541Srgrimes	struct mbuf *top;
4471541Srgrimes	int copyhdr = 0;
4481541Srgrimes
44952201Salfred	KASSERT(off >= 0, ("m_copym, negative off %d", off));
45052201Salfred	KASSERT(len >= 0, ("m_copym, negative len %d", len));
451132488Salfred	MBUF_CHECKSLEEP(wait);
4521541Srgrimes	if (off == 0 && m->m_flags & M_PKTHDR)
4531541Srgrimes		copyhdr = 1;
4541541Srgrimes	while (off > 0) {
45552201Salfred		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
4561541Srgrimes		if (off < m->m_len)
4571541Srgrimes			break;
4581541Srgrimes		off -= m->m_len;
4591541Srgrimes		m = m->m_next;
4601541Srgrimes	}
4611541Srgrimes	np = &top;
4621541Srgrimes	top = 0;
4631541Srgrimes	while (len > 0) {
46472356Sbmilekic		if (m == NULL) {
465266876Sglebius			KASSERT(len == M_COPYALL,
46652201Salfred			    ("m_copym, length > size of mbuf chain"));
4671541Srgrimes			break;
4681541Srgrimes		}
469117770Ssilby		if (copyhdr)
470248372Sglebius			n = m_gethdr(wait, m->m_type);
471117770Ssilby		else
472248372Sglebius			n = m_get(wait, m->m_type);
4731541Srgrimes		*np = n;
47472356Sbmilekic		if (n == NULL)
4751541Srgrimes			goto nospace;
4761541Srgrimes		if (copyhdr) {
477108466Ssam			if (!m_dup_pkthdr(n, m, wait))
478108466Ssam				goto nospace;
4791541Srgrimes			if (len == M_COPYALL)
4801541Srgrimes				n->m_pkthdr.len -= off0;
4811541Srgrimes			else
4821541Srgrimes				n->m_pkthdr.len = len;
4831541Srgrimes			copyhdr = 0;
4841541Srgrimes		}
4851541Srgrimes		n->m_len = min(len, m->m_len - off);
4861541Srgrimes		if (m->m_flags & M_EXT) {
4871541Srgrimes			n->m_data = m->m_data + off;
488151976Sandre			mb_dupcl(n, m);
4891541Srgrimes		} else
4901541Srgrimes			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
491103569Sbmilekic			    (u_int)n->m_len);
4921541Srgrimes		if (len != M_COPYALL)
4931541Srgrimes			len -= n->m_len;
4941541Srgrimes		off = 0;
4951541Srgrimes		m = m->m_next;
4961541Srgrimes		np = &n->m_next;
4971541Srgrimes	}
49878592Sbmilekic
4991541Srgrimes	return (top);
5001541Srgrimesnospace:
5011541Srgrimes	m_freem(top);
50272356Sbmilekic	return (NULL);
5031541Srgrimes}
5041541Srgrimes
5051541Srgrimes/*
50615689Swollman * Copy an entire packet, including header (which must be present).
50715689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
50854002Sarchie * Note that the copy is read-only, because clusters are not copied,
50954002Sarchie * only their reference counts are incremented.
51072750Sluigi * Preserve alignment of the first mbuf so if the creator has left
51172750Sluigi * some room at the beginning (e.g. for inserting protocol headers)
51272750Sluigi * the copies still have the room available.
51315689Swollman */
51415689Swollmanstruct mbuf *
51572356Sbmilekicm_copypacket(struct mbuf *m, int how)
51615689Swollman{
51715689Swollman	struct mbuf *top, *n, *o;
51815689Swollman
519132488Salfred	MBUF_CHECKSLEEP(how);
520248372Sglebius	n = m_get(how, m->m_type);
52115689Swollman	top = n;
52272356Sbmilekic	if (n == NULL)
52315689Swollman		goto nospace;
52415689Swollman
525108466Ssam	if (!m_dup_pkthdr(n, m, how))
526108466Ssam		goto nospace;
52715689Swollman	n->m_len = m->m_len;
52815689Swollman	if (m->m_flags & M_EXT) {
52915689Swollman		n->m_data = m->m_data;
530151976Sandre		mb_dupcl(n, m);
53115689Swollman	} else {
53272750Sluigi		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
53315689Swollman		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
53415689Swollman	}
53515689Swollman
53615689Swollman	m = m->m_next;
53715689Swollman	while (m) {
538248372Sglebius		o = m_get(how, m->m_type);
53972356Sbmilekic		if (o == NULL)
54015689Swollman			goto nospace;
54115689Swollman
54215689Swollman		n->m_next = o;
54315689Swollman		n = n->m_next;
54415689Swollman
54515689Swollman		n->m_len = m->m_len;
54615689Swollman		if (m->m_flags & M_EXT) {
54715689Swollman			n->m_data = m->m_data;
548151976Sandre			mb_dupcl(n, m);
54915689Swollman		} else {
55015689Swollman			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
55115689Swollman		}
55215689Swollman
55315689Swollman		m = m->m_next;
55415689Swollman	}
55515689Swollman	return top;
55615689Swollmannospace:
55715689Swollman	m_freem(top);
55872356Sbmilekic	return (NULL);
55915689Swollman}
56015689Swollman
56115689Swollman/*
5621541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning,
5631541Srgrimes * continuing for "len" bytes, into the indicated buffer.
5641541Srgrimes */
5651549Srgrimesvoid
56681907Sjulianm_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
5671541Srgrimes{
568103569Sbmilekic	u_int count;
5691541Srgrimes
57052201Salfred	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
57152201Salfred	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
5721541Srgrimes	while (off > 0) {
57352201Salfred		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
5741541Srgrimes		if (off < m->m_len)
5751541Srgrimes			break;
5761541Srgrimes		off -= m->m_len;
5771541Srgrimes		m = m->m_next;
5781541Srgrimes	}
5791541Srgrimes	while (len > 0) {
58052201Salfred		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
5811541Srgrimes		count = min(m->m_len - off, len);
5821541Srgrimes		bcopy(mtod(m, caddr_t) + off, cp, count);
5831541Srgrimes		len -= count;
5841541Srgrimes		cp += count;
5851541Srgrimes		off = 0;
5861541Srgrimes		m = m->m_next;
5871541Srgrimes	}
5881541Srgrimes}
5891541Srgrimes
5901541Srgrimes/*
59154002Sarchie * Copy a packet header mbuf chain into a completely new chain, including
59254002Sarchie * copying any mbuf clusters.  Use this instead of m_copypacket() when
59354002Sarchie * you need a writable copy of an mbuf chain.
59454002Sarchie */
59554002Sarchiestruct mbuf *
596286450Smelifarom_dup(const struct mbuf *m, int how)
59754002Sarchie{
59854002Sarchie	struct mbuf **p, *top = NULL;
59954002Sarchie	int remain, moff, nsize;
60054002Sarchie
601132488Salfred	MBUF_CHECKSLEEP(how);
60254002Sarchie	/* Sanity check */
60354002Sarchie	if (m == NULL)
60472356Sbmilekic		return (NULL);
605113255Sdes	M_ASSERTPKTHDR(m);
60654002Sarchie
60754002Sarchie	/* While there's more data, get a new mbuf, tack it on, and fill it */
60854002Sarchie	remain = m->m_pkthdr.len;
60954002Sarchie	moff = 0;
61054002Sarchie	p = &top;
61154002Sarchie	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
61254002Sarchie		struct mbuf *n;
61354002Sarchie
61454002Sarchie		/* Get the next new mbuf */
615129906Sbmilekic		if (remain >= MINCLSIZE) {
616129906Sbmilekic			n = m_getcl(how, m->m_type, 0);
617129906Sbmilekic			nsize = MCLBYTES;
618129906Sbmilekic		} else {
619129906Sbmilekic			n = m_get(how, m->m_type);
620129906Sbmilekic			nsize = MLEN;
621129906Sbmilekic		}
62254002Sarchie		if (n == NULL)
62354002Sarchie			goto nospace;
624129906Sbmilekic
625129906Sbmilekic		if (top == NULL) {		/* First one, must be PKTHDR */
626129906Sbmilekic			if (!m_dup_pkthdr(n, m, how)) {
627129906Sbmilekic				m_free(n);
628108466Ssam				goto nospace;
629129906Sbmilekic			}
630153428Semaste			if ((n->m_flags & M_EXT) == 0)
631153428Semaste				nsize = MHLEN;
632282594Sae			n->m_flags &= ~M_RDONLY;
63354002Sarchie		}
63454002Sarchie		n->m_len = 0;
63554002Sarchie
63654002Sarchie		/* Link it into the new chain */
63754002Sarchie		*p = n;
63854002Sarchie		p = &n->m_next;
63954002Sarchie
64054002Sarchie		/* Copy data from original mbuf(s) into new mbuf */
64154002Sarchie		while (n->m_len < nsize && m != NULL) {
64254002Sarchie			int chunk = min(nsize - n->m_len, m->m_len - moff);
64354002Sarchie
64454002Sarchie			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
64554002Sarchie			moff += chunk;
64654002Sarchie			n->m_len += chunk;
64754002Sarchie			remain -= chunk;
64854002Sarchie			if (moff == m->m_len) {
64954002Sarchie				m = m->m_next;
65054002Sarchie				moff = 0;
65154002Sarchie			}
65254002Sarchie		}
65354002Sarchie
65454002Sarchie		/* Check correct total mbuf length */
65554002Sarchie		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
65687594Sobrien		    	("%s: bogus m_pkthdr.len", __func__));
65754002Sarchie	}
65854002Sarchie	return (top);
65954002Sarchie
66054002Sarchienospace:
66154002Sarchie	m_freem(top);
66272356Sbmilekic	return (NULL);
66354002Sarchie}
66454002Sarchie
66554002Sarchie/*
6661541Srgrimes * Concatenate mbuf chain n to m.
6671541Srgrimes * Both chains must be of the same type (e.g. MT_DATA).
6681541Srgrimes * Any m_pkthdr is not updated.
6691541Srgrimes */
6701549Srgrimesvoid
67172356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n)
6721541Srgrimes{
6731541Srgrimes	while (m->m_next)
6741541Srgrimes		m = m->m_next;
6751541Srgrimes	while (n) {
676242256Sandre		if (!M_WRITABLE(m) ||
677242256Sandre		    M_TRAILINGSPACE(m) < n->m_len) {
6781541Srgrimes			/* just join the two chains */
6791541Srgrimes			m->m_next = n;
6801541Srgrimes			return;
6811541Srgrimes		}
6821541Srgrimes		/* splat the data from one into the other */
6831541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
6841541Srgrimes		    (u_int)n->m_len);
6851541Srgrimes		m->m_len += n->m_len;
6861541Srgrimes		n = m_free(n);
6871541Srgrimes	}
6881541Srgrimes}
6891541Srgrimes
690271088Sglebius/*
691271088Sglebius * Concatenate two pkthdr mbuf chains.
692271088Sglebius */
6931549Srgrimesvoid
694271088Sglebiusm_catpkt(struct mbuf *m, struct mbuf *n)
695271088Sglebius{
696271088Sglebius
697271088Sglebius	M_ASSERTPKTHDR(m);
698271088Sglebius	M_ASSERTPKTHDR(n);
699271088Sglebius
700271088Sglebius	m->m_pkthdr.len += n->m_pkthdr.len;
701275329Sglebius	m_demote(n, 1, 0);
702271088Sglebius
703271088Sglebius	m_cat(m, n);
704271088Sglebius}
705271088Sglebius
706271088Sglebiusvoid
70772356Sbmilekicm_adj(struct mbuf *mp, int req_len)
7081541Srgrimes{
70972356Sbmilekic	int len = req_len;
71072356Sbmilekic	struct mbuf *m;
71172356Sbmilekic	int count;
7121541Srgrimes
7131541Srgrimes	if ((m = mp) == NULL)
7141541Srgrimes		return;
7151541Srgrimes	if (len >= 0) {
7161541Srgrimes		/*
7171541Srgrimes		 * Trim from head.
7181541Srgrimes		 */
7191541Srgrimes		while (m != NULL && len > 0) {
7201541Srgrimes			if (m->m_len <= len) {
7211541Srgrimes				len -= m->m_len;
7221541Srgrimes				m->m_len = 0;
7231541Srgrimes				m = m->m_next;
7241541Srgrimes			} else {
7251541Srgrimes				m->m_len -= len;
7261541Srgrimes				m->m_data += len;
7271541Srgrimes				len = 0;
7281541Srgrimes			}
7291541Srgrimes		}
7301541Srgrimes		if (mp->m_flags & M_PKTHDR)
731207475Szec			mp->m_pkthdr.len -= (req_len - len);
7321541Srgrimes	} else {
7331541Srgrimes		/*
7341541Srgrimes		 * Trim from tail.  Scan the mbuf chain,
7351541Srgrimes		 * calculating its length and finding the last mbuf.
7361541Srgrimes		 * If the adjustment only affects this mbuf, then just
7371541Srgrimes		 * adjust and return.  Otherwise, rescan and truncate
7381541Srgrimes		 * after the remaining size.
7391541Srgrimes		 */
7401541Srgrimes		len = -len;
7411541Srgrimes		count = 0;
7421541Srgrimes		for (;;) {
7431541Srgrimes			count += m->m_len;
7441541Srgrimes			if (m->m_next == (struct mbuf *)0)
7451541Srgrimes				break;
7461541Srgrimes			m = m->m_next;
7471541Srgrimes		}
7481541Srgrimes		if (m->m_len >= len) {
7491541Srgrimes			m->m_len -= len;
7501541Srgrimes			if (mp->m_flags & M_PKTHDR)
7511541Srgrimes				mp->m_pkthdr.len -= len;
7521541Srgrimes			return;
7531541Srgrimes		}
7541541Srgrimes		count -= len;
7551541Srgrimes		if (count < 0)
7561541Srgrimes			count = 0;
7571541Srgrimes		/*
7581541Srgrimes		 * Correct length for chain is "count".
7591541Srgrimes		 * Find the mbuf with last data, adjust its length,
7601541Srgrimes		 * and toss data from remaining mbufs on chain.
7611541Srgrimes		 */
7621541Srgrimes		m = mp;
7631541Srgrimes		if (m->m_flags & M_PKTHDR)
7641541Srgrimes			m->m_pkthdr.len = count;
7651541Srgrimes		for (; m; m = m->m_next) {
7661541Srgrimes			if (m->m_len >= count) {
7671541Srgrimes				m->m_len = count;
768142350Ssam				if (m->m_next != NULL) {
769142350Ssam					m_freem(m->m_next);
770142350Ssam					m->m_next = NULL;
771142350Ssam				}
7721541Srgrimes				break;
7731541Srgrimes			}
7741541Srgrimes			count -= m->m_len;
7751541Srgrimes		}
7761541Srgrimes	}
7771541Srgrimes}
7781541Srgrimes
7791541Srgrimes/*
7801541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous
781242304Skevlo * and in the data area of an mbuf (so that mtod will work
782242304Skevlo * for a structure of size len).  Returns the resulting
7831541Srgrimes * mbuf chain on success, frees it and returns null on failure.
7841541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the
7851541Srgrimes * contiguous region in an attempt to avoid being called next time.
7861541Srgrimes */
7871541Srgrimesstruct mbuf *
78872356Sbmilekicm_pullup(struct mbuf *n, int len)
7891541Srgrimes{
79072356Sbmilekic	struct mbuf *m;
79172356Sbmilekic	int count;
7921541Srgrimes	int space;
7931541Srgrimes
7941541Srgrimes	/*
7951541Srgrimes	 * If first mbuf has no cluster, and has room for len bytes
7961541Srgrimes	 * without shifting current data, pullup into it,
7971541Srgrimes	 * otherwise allocate a new mbuf to prepend to the chain.
7981541Srgrimes	 */
7991541Srgrimes	if ((n->m_flags & M_EXT) == 0 &&
8001541Srgrimes	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
8011541Srgrimes		if (n->m_len >= len)
8021541Srgrimes			return (n);
8031541Srgrimes		m = n;
8041541Srgrimes		n = n->m_next;
8051541Srgrimes		len -= m->m_len;
8061541Srgrimes	} else {
8071541Srgrimes		if (len > MHLEN)
8081541Srgrimes			goto bad;
809248372Sglebius		m = m_get(M_NOWAIT, n->m_type);
81072356Sbmilekic		if (m == NULL)
8111541Srgrimes			goto bad;
812108466Ssam		if (n->m_flags & M_PKTHDR)
813248372Sglebius			m_move_pkthdr(m, n);
8141541Srgrimes	}
8151541Srgrimes	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
8161541Srgrimes	do {
8171541Srgrimes		count = min(min(max(len, max_protohdr), space), n->m_len);
8181541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
819103569Sbmilekic		  (u_int)count);
8201541Srgrimes		len -= count;
8211541Srgrimes		m->m_len += count;
8221541Srgrimes		n->m_len -= count;
8231541Srgrimes		space -= count;
8241541Srgrimes		if (n->m_len)
8251541Srgrimes			n->m_data += count;
8261541Srgrimes		else
8271541Srgrimes			n = m_free(n);
8281541Srgrimes	} while (len > 0 && n);
8291541Srgrimes	if (len > 0) {
8301541Srgrimes		(void) m_free(m);
8311541Srgrimes		goto bad;
8321541Srgrimes	}
8331541Srgrimes	m->m_next = n;
8341541Srgrimes	return (m);
8351541Srgrimesbad:
8361541Srgrimes	m_freem(n);
83772356Sbmilekic	return (NULL);
8381541Srgrimes}
8391541Srgrimes
8401541Srgrimes/*
841143761Sjmg * Like m_pullup(), except a new mbuf is always allocated, and we allow
842143761Sjmg * the amount of empty space before the data in the new mbuf to be specified
843143761Sjmg * (in the event that the caller expects to prepend later).
844143761Sjmg */
845143761Sjmgstruct mbuf *
846143761Sjmgm_copyup(struct mbuf *n, int len, int dstoff)
847143761Sjmg{
848143761Sjmg	struct mbuf *m;
849143761Sjmg	int count, space;
850143761Sjmg
851143761Sjmg	if (len > (MHLEN - dstoff))
852143761Sjmg		goto bad;
853248372Sglebius	m = m_get(M_NOWAIT, n->m_type);
854143761Sjmg	if (m == NULL)
855143761Sjmg		goto bad;
856143761Sjmg	if (n->m_flags & M_PKTHDR)
857248372Sglebius		m_move_pkthdr(m, n);
858143761Sjmg	m->m_data += dstoff;
859143761Sjmg	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
860143761Sjmg	do {
861143761Sjmg		count = min(min(max(len, max_protohdr), space), n->m_len);
862143761Sjmg		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
863143761Sjmg		    (unsigned)count);
864143761Sjmg		len -= count;
865143761Sjmg		m->m_len += count;
866143761Sjmg		n->m_len -= count;
867143761Sjmg		space -= count;
868143761Sjmg		if (n->m_len)
869143761Sjmg			n->m_data += count;
870143761Sjmg		else
871143761Sjmg			n = m_free(n);
872143761Sjmg	} while (len > 0 && n);
873143761Sjmg	if (len > 0) {
874143761Sjmg		(void) m_free(m);
875143761Sjmg		goto bad;
876143761Sjmg	}
877143761Sjmg	m->m_next = n;
878143761Sjmg	return (m);
879143761Sjmg bad:
880143761Sjmg	m_freem(n);
881143761Sjmg	return (NULL);
882143761Sjmg}
883143761Sjmg
884143761Sjmg/*
8851541Srgrimes * Partition an mbuf chain in two pieces, returning the tail --
8861541Srgrimes * all but the first len0 bytes.  In case of failure, it returns NULL and
8871541Srgrimes * attempts to restore the chain to its original state.
88897681Sarchie *
88997681Sarchie * Note that the resulting mbufs might be read-only, because the new
89097681Sarchie * mbuf can end up sharing an mbuf cluster with the original mbuf if
89197681Sarchie * the "breaking point" happens to lie within a cluster mbuf. Use the
89297681Sarchie * M_WRITABLE() macro to check for this case.
8931541Srgrimes */
8941541Srgrimesstruct mbuf *
89572356Sbmilekicm_split(struct mbuf *m0, int len0, int wait)
8961541Srgrimes{
89772356Sbmilekic	struct mbuf *m, *n;
898103569Sbmilekic	u_int len = len0, remain;
8991541Srgrimes
900132488Salfred	MBUF_CHECKSLEEP(wait);
9011541Srgrimes	for (m = m0; m && len > m->m_len; m = m->m_next)
9021541Srgrimes		len -= m->m_len;
90372356Sbmilekic	if (m == NULL)
90472356Sbmilekic		return (NULL);
9051541Srgrimes	remain = m->m_len - len;
906248887Sglebius	if (m0->m_flags & M_PKTHDR && remain == 0) {
907248372Sglebius		n = m_gethdr(wait, m0->m_type);
908258128Sglebius		if (n == NULL)
909248887Sglebius			return (NULL);
910248887Sglebius		n->m_next = m->m_next;
911248887Sglebius		m->m_next = NULL;
912248887Sglebius		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
913248887Sglebius		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
914248887Sglebius		m0->m_pkthdr.len = len0;
915248887Sglebius		return (n);
916248887Sglebius	} else if (m0->m_flags & M_PKTHDR) {
917248887Sglebius		n = m_gethdr(wait, m0->m_type);
91872356Sbmilekic		if (n == NULL)
91972356Sbmilekic			return (NULL);
9201541Srgrimes		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9211541Srgrimes		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
9221541Srgrimes		m0->m_pkthdr.len = len0;
9231541Srgrimes		if (m->m_flags & M_EXT)
9241541Srgrimes			goto extpacket;
9251541Srgrimes		if (remain > MHLEN) {
9261541Srgrimes			/* m can't be the lead packet */
927276692Srwatson			M_ALIGN(n, 0);
9281541Srgrimes			n->m_next = m_split(m, len, wait);
92972356Sbmilekic			if (n->m_next == NULL) {
9301541Srgrimes				(void) m_free(n);
93172356Sbmilekic				return (NULL);
93294471Shsu			} else {
93394471Shsu				n->m_len = 0;
9341541Srgrimes				return (n);
93594471Shsu			}
9361541Srgrimes		} else
937276692Srwatson			M_ALIGN(n, remain);
9381541Srgrimes	} else if (remain == 0) {
9391541Srgrimes		n = m->m_next;
94072356Sbmilekic		m->m_next = NULL;
9411541Srgrimes		return (n);
9421541Srgrimes	} else {
943248372Sglebius		n = m_get(wait, m->m_type);
94472356Sbmilekic		if (n == NULL)
94572356Sbmilekic			return (NULL);
9461541Srgrimes		M_ALIGN(n, remain);
9471541Srgrimes	}
9481541Srgrimesextpacket:
9491541Srgrimes	if (m->m_flags & M_EXT) {
9501541Srgrimes		n->m_data = m->m_data + len;
951151976Sandre		mb_dupcl(n, m);
9521541Srgrimes	} else {
9531541Srgrimes		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
9541541Srgrimes	}
9551541Srgrimes	n->m_len = remain;
9561541Srgrimes	m->m_len = len;
9571541Srgrimes	n->m_next = m->m_next;
95872356Sbmilekic	m->m_next = NULL;
9591541Srgrimes	return (n);
9601541Srgrimes}
9611541Srgrimes/*
9621541Srgrimes * Routine to copy from device local memory into mbufs.
96378508Sbmilekic * Note that `off' argument is offset into first mbuf of target chain from
96478508Sbmilekic * which to begin copying the data to.
9651541Srgrimes */
9661541Srgrimesstruct mbuf *
96778508Sbmilekicm_devget(char *buf, int totlen, int off, struct ifnet *ifp,
968169624Srwatson    void (*copy)(char *from, caddr_t to, u_int len))
9691541Srgrimes{
97072356Sbmilekic	struct mbuf *m;
971129906Sbmilekic	struct mbuf *top = NULL, **mp = &top;
97278508Sbmilekic	int len;
9731541Srgrimes
97478508Sbmilekic	if (off < 0 || off > MHLEN)
97578508Sbmilekic		return (NULL);
97678508Sbmilekic
977129906Sbmilekic	while (totlen > 0) {
978129906Sbmilekic		if (top == NULL) {	/* First one, must be PKTHDR */
979129906Sbmilekic			if (totlen + off >= MINCLSIZE) {
980243882Sglebius				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
981129906Sbmilekic				len = MCLBYTES;
982129906Sbmilekic			} else {
983243882Sglebius				m = m_gethdr(M_NOWAIT, MT_DATA);
984129906Sbmilekic				len = MHLEN;
9851541Srgrimes
986129906Sbmilekic				/* Place initial small packet/header at end of mbuf */
987129906Sbmilekic				if (m && totlen + off + max_linkhdr <= MLEN) {
988129906Sbmilekic					m->m_data += max_linkhdr;
989129906Sbmilekic					len -= max_linkhdr;
990129906Sbmilekic				}
991129906Sbmilekic			}
992129906Sbmilekic			if (m == NULL)
993129906Sbmilekic				return NULL;
994129906Sbmilekic			m->m_pkthdr.rcvif = ifp;
995129906Sbmilekic			m->m_pkthdr.len = totlen;
996129906Sbmilekic		} else {
997129906Sbmilekic			if (totlen + off >= MINCLSIZE) {
998243882Sglebius				m = m_getcl(M_NOWAIT, MT_DATA, 0);
999129906Sbmilekic				len = MCLBYTES;
1000129906Sbmilekic			} else {
1001243882Sglebius				m = m_get(M_NOWAIT, MT_DATA);
1002129906Sbmilekic				len = MLEN;
1003129906Sbmilekic			}
100472356Sbmilekic			if (m == NULL) {
10051541Srgrimes				m_freem(top);
1006129906Sbmilekic				return NULL;
10071541Srgrimes			}
10081541Srgrimes		}
100978508Sbmilekic		if (off) {
101078508Sbmilekic			m->m_data += off;
101178508Sbmilekic			len -= off;
101278508Sbmilekic			off = 0;
101378508Sbmilekic		}
101478508Sbmilekic		m->m_len = len = min(totlen, len);
10151541Srgrimes		if (copy)
1016103569Sbmilekic			copy(buf, mtod(m, caddr_t), (u_int)len);
10171541Srgrimes		else
1018103569Sbmilekic			bcopy(buf, mtod(m, caddr_t), (u_int)len);
101978508Sbmilekic		buf += len;
10201541Srgrimes		*mp = m;
10211541Srgrimes		mp = &m->m_next;
10221541Srgrimes		totlen -= len;
10231541Srgrimes	}
10241541Srgrimes	return (top);
10251541Srgrimes}
10263352Sphk
10273352Sphk/*
10283352Sphk * Copy data from a buffer back into the indicated mbuf chain,
10293352Sphk * starting "off" bytes from the beginning, extending the mbuf
10303352Sphk * chain if necessary.
10313352Sphk */
10323352Sphkvoid
1033128402Sluigim_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
10343352Sphk{
103572356Sbmilekic	int mlen;
103672356Sbmilekic	struct mbuf *m = m0, *n;
10373352Sphk	int totlen = 0;
10383352Sphk
103972356Sbmilekic	if (m0 == NULL)
10403352Sphk		return;
10413352Sphk	while (off > (mlen = m->m_len)) {
10423352Sphk		off -= mlen;
10433352Sphk		totlen += mlen;
104472356Sbmilekic		if (m->m_next == NULL) {
1045243882Sglebius			n = m_get(M_NOWAIT, m->m_type);
104672356Sbmilekic			if (n == NULL)
10473352Sphk				goto out;
1048129906Sbmilekic			bzero(mtod(n, caddr_t), MLEN);
10493352Sphk			n->m_len = min(MLEN, len + off);
10503352Sphk			m->m_next = n;
10513352Sphk		}
10523352Sphk		m = m->m_next;
10533352Sphk	}
10543352Sphk	while (len > 0) {
1055187409Smav		if (m->m_next == NULL && (len > m->m_len - off)) {
1056187409Smav			m->m_len += min(len - (m->m_len - off),
1057187409Smav			    M_TRAILINGSPACE(m));
1058187409Smav		}
10593352Sphk		mlen = min (m->m_len - off, len);
1060103569Sbmilekic		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
10613352Sphk		cp += mlen;
10623352Sphk		len -= mlen;
10633352Sphk		mlen += off;
10643352Sphk		off = 0;
10653352Sphk		totlen += mlen;
10663352Sphk		if (len == 0)
10673352Sphk			break;
106872356Sbmilekic		if (m->m_next == NULL) {
1069243882Sglebius			n = m_get(M_NOWAIT, m->m_type);
107072356Sbmilekic			if (n == NULL)
10713352Sphk				break;
10723352Sphk			n->m_len = min(MLEN, len);
10733352Sphk			m->m_next = n;
10743352Sphk		}
10753352Sphk		m = m->m_next;
10763352Sphk	}
10773352Sphkout:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
10783352Sphk		m->m_pkthdr.len = totlen;
10793352Sphk}
108052756Sphk
1081123557Sbms/*
1082138541Ssam * Append the specified data to the indicated mbuf chain,
1083138541Ssam * Extend the mbuf chain if the new data does not fit in
1084138541Ssam * existing space.
1085138541Ssam *
1086138541Ssam * Return 1 if able to complete the job; otherwise 0.
1087138541Ssam */
1088138541Ssamint
1089138541Ssamm_append(struct mbuf *m0, int len, c_caddr_t cp)
1090138541Ssam{
1091138541Ssam	struct mbuf *m, *n;
1092138541Ssam	int remainder, space;
1093138541Ssam
1094138541Ssam	for (m = m0; m->m_next != NULL; m = m->m_next)
1095138541Ssam		;
1096138541Ssam	remainder = len;
1097138541Ssam	space = M_TRAILINGSPACE(m);
1098138541Ssam	if (space > 0) {
1099138541Ssam		/*
1100138541Ssam		 * Copy into available space.
1101138541Ssam		 */
1102138541Ssam		if (space > remainder)
1103138541Ssam			space = remainder;
1104138541Ssam		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1105138541Ssam		m->m_len += space;
1106138541Ssam		cp += space, remainder -= space;
1107138541Ssam	}
1108138541Ssam	while (remainder > 0) {
1109138541Ssam		/*
1110138541Ssam		 * Allocate a new mbuf; could check space
1111138541Ssam		 * and allocate a cluster instead.
1112138541Ssam		 */
1113243882Sglebius		n = m_get(M_NOWAIT, m->m_type);
1114138541Ssam		if (n == NULL)
1115138541Ssam			break;
1116138541Ssam		n->m_len = min(MLEN, remainder);
1117138894Ssam		bcopy(cp, mtod(n, caddr_t), n->m_len);
1118138894Ssam		cp += n->m_len, remainder -= n->m_len;
1119138541Ssam		m->m_next = n;
1120138541Ssam		m = n;
1121138541Ssam	}
1122138541Ssam	if (m0->m_flags & M_PKTHDR)
1123138541Ssam		m0->m_pkthdr.len += len - remainder;
1124138541Ssam	return (remainder == 0);
1125138541Ssam}
1126138541Ssam
1127138541Ssam/*
1128123557Sbms * Apply function f to the data in an mbuf chain starting "off" bytes from
1129123557Sbms * the beginning, continuing for "len" bytes.
1130123557Sbms */
1131123557Sbmsint
1132123557Sbmsm_apply(struct mbuf *m, int off, int len,
1133123564Sbms    int (*f)(void *, void *, u_int), void *arg)
1134123557Sbms{
1135123564Sbms	u_int count;
1136123557Sbms	int rval;
1137123557Sbms
1138123557Sbms	KASSERT(off >= 0, ("m_apply, negative off %d", off));
1139123557Sbms	KASSERT(len >= 0, ("m_apply, negative len %d", len));
1140123557Sbms	while (off > 0) {
1141123557Sbms		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1142123557Sbms		if (off < m->m_len)
1143123557Sbms			break;
1144123557Sbms		off -= m->m_len;
1145123557Sbms		m = m->m_next;
1146123557Sbms	}
1147123557Sbms	while (len > 0) {
1148123557Sbms		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1149123557Sbms		count = min(m->m_len - off, len);
1150123557Sbms		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1151123557Sbms		if (rval)
1152123557Sbms			return (rval);
1153123557Sbms		len -= count;
1154123557Sbms		off = 0;
1155123557Sbms		m = m->m_next;
1156123557Sbms	}
1157123557Sbms	return (0);
1158123557Sbms}
1159123557Sbms
1160123557Sbms/*
1161123557Sbms * Return a pointer to mbuf/offset of location in mbuf chain.
1162123557Sbms */
1163123557Sbmsstruct mbuf *
1164123557Sbmsm_getptr(struct mbuf *m, int loc, int *off)
1165123557Sbms{
1166123557Sbms
1167123557Sbms	while (loc >= 0) {
1168123564Sbms		/* Normal end of search. */
1169123557Sbms		if (m->m_len > loc) {
1170123557Sbms			*off = loc;
1171123557Sbms			return (m);
1172123557Sbms		} else {
1173123557Sbms			loc -= m->m_len;
1174123557Sbms			if (m->m_next == NULL) {
1175123557Sbms				if (loc == 0) {
1176123564Sbms					/* Point at the end of valid data. */
1177123557Sbms					*off = m->m_len;
1178123557Sbms					return (m);
1179123564Sbms				}
1180123564Sbms				return (NULL);
1181123564Sbms			}
1182123564Sbms			m = m->m_next;
1183123557Sbms		}
1184123557Sbms	}
1185123557Sbms	return (NULL);
1186123557Sbms}
1187123557Sbms
118852756Sphkvoid
1189135904Sjmgm_print(const struct mbuf *m, int maxlen)
119052756Sphk{
119152756Sphk	int len;
1192135904Sjmg	int pdata;
119354906Seivind	const struct mbuf *m2;
119452756Sphk
1195230587Sken	if (m == NULL) {
1196230587Sken		printf("mbuf: %p\n", m);
1197230587Sken		return;
1198230587Sken	}
1199230587Sken
1200135904Sjmg	if (m->m_flags & M_PKTHDR)
1201135904Sjmg		len = m->m_pkthdr.len;
1202135904Sjmg	else
1203135904Sjmg		len = -1;
120452756Sphk	m2 = m;
1205135904Sjmg	while (m2 != NULL && (len == -1 || len)) {
1206135904Sjmg		pdata = m2->m_len;
1207135904Sjmg		if (maxlen != -1 && pdata > maxlen)
1208135904Sjmg			pdata = maxlen;
1209135904Sjmg		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1210135904Sjmg		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
1211135904Sjmg		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1212135904Sjmg		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1213135904Sjmg		if (pdata)
1214156700Sjmg			printf(", %*D\n", pdata, (u_char *)m2->m_data, "-");
1215135904Sjmg		if (len != -1)
1216135904Sjmg			len -= m2->m_len;
121752756Sphk		m2 = m2->m_next;
121852756Sphk	}
1219135904Sjmg	if (len > 0)
1220135904Sjmg		printf("%d bytes unaccounted for.\n", len);
122152756Sphk	return;
122252756Sphk}
1223103540Sphk
1224103569Sbmilekicu_int
1225103540Sphkm_fixhdr(struct mbuf *m0)
1226103540Sphk{
1227103569Sbmilekic	u_int len;
1228103540Sphk
1229103544Sphk	len = m_length(m0, NULL);
1230103544Sphk	m0->m_pkthdr.len = len;
1231103544Sphk	return (len);
1232103544Sphk}
1233103544Sphk
1234103569Sbmilekicu_int
1235103544Sphkm_length(struct mbuf *m0, struct mbuf **last)
1236103544Sphk{
1237103544Sphk	struct mbuf *m;
1238103569Sbmilekic	u_int len;
1239103544Sphk
1240103544Sphk	len = 0;
1241103544Sphk	for (m = m0; m != NULL; m = m->m_next) {
1242103540Sphk		len += m->m_len;
1243103544Sphk		if (m->m_next == NULL)
1244103544Sphk			break;
1245103540Sphk	}
1246103544Sphk	if (last != NULL)
1247103544Sphk		*last = m;
1248103544Sphk	return (len);
1249103540Sphk}
1250112777Ssilby
1251112777Ssilby/*
1252112777Ssilby * Defragment a mbuf chain, returning the shortest possible
1253112777Ssilby * chain of mbufs and clusters.  If allocation fails and
1254112777Ssilby * this cannot be completed, NULL will be returned, but
1255112777Ssilby * the passed in chain will be unchanged.  Upon success,
1256112777Ssilby * the original chain will be freed, and the new chain
1257112777Ssilby * will be returned.
1258112777Ssilby *
1259112777Ssilby * If a non-packet header is passed in, the original
1260112777Ssilby * mbuf (chain?) will be returned unharmed.
1261112777Ssilby */
1262112777Ssilbystruct mbuf *
1263112777Ssilbym_defrag(struct mbuf *m0, int how)
1264112777Ssilby{
1265125472Ssilby	struct mbuf *m_new = NULL, *m_final = NULL;
1266125472Ssilby	int progress = 0, length;
1267112777Ssilby
1268132488Salfred	MBUF_CHECKSLEEP(how);
1269112777Ssilby	if (!(m0->m_flags & M_PKTHDR))
1270112777Ssilby		return (m0);
1271112777Ssilby
1272117770Ssilby	m_fixhdr(m0); /* Needed sanity check */
1273117770Ssilby
1274113490Ssilby#ifdef MBUF_STRESS_TEST
1275113490Ssilby	if (m_defragrandomfailures) {
1276113490Ssilby		int temp = arc4random() & 0xff;
1277113490Ssilby		if (temp == 0xba)
1278113490Ssilby			goto nospace;
1279113490Ssilby	}
1280113490Ssilby#endif
1281266876Sglebius
1282112777Ssilby	if (m0->m_pkthdr.len > MHLEN)
1283112777Ssilby		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1284112777Ssilby	else
1285112777Ssilby		m_final = m_gethdr(how, MT_DATA);
1286112777Ssilby
1287112777Ssilby	if (m_final == NULL)
1288112777Ssilby		goto nospace;
1289112777Ssilby
1290123740Speter	if (m_dup_pkthdr(m_final, m0, how) == 0)
1291112777Ssilby		goto nospace;
1292112777Ssilby
1293112777Ssilby	m_new = m_final;
1294112777Ssilby
1295112777Ssilby	while (progress < m0->m_pkthdr.len) {
1296112777Ssilby		length = m0->m_pkthdr.len - progress;
1297112777Ssilby		if (length > MCLBYTES)
1298112777Ssilby			length = MCLBYTES;
1299112777Ssilby
1300112777Ssilby		if (m_new == NULL) {
1301112777Ssilby			if (length > MLEN)
1302112777Ssilby				m_new = m_getcl(how, MT_DATA, 0);
1303112777Ssilby			else
1304112777Ssilby				m_new = m_get(how, MT_DATA);
1305112777Ssilby			if (m_new == NULL)
1306112777Ssilby				goto nospace;
1307112777Ssilby		}
1308112777Ssilby
1309112777Ssilby		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1310112777Ssilby		progress += length;
1311112777Ssilby		m_new->m_len = length;
1312112777Ssilby		if (m_new != m_final)
1313112777Ssilby			m_cat(m_final, m_new);
1314112777Ssilby		m_new = NULL;
1315112777Ssilby	}
1316116455Ssilby#ifdef MBUF_STRESS_TEST
1317112777Ssilby	if (m0->m_next == NULL)
1318112777Ssilby		m_defraguseless++;
1319116455Ssilby#endif
1320112777Ssilby	m_freem(m0);
1321112777Ssilby	m0 = m_final;
1322116455Ssilby#ifdef MBUF_STRESS_TEST
1323112777Ssilby	m_defragpackets++;
1324112777Ssilby	m_defragbytes += m0->m_pkthdr.len;
1325116455Ssilby#endif
1326112777Ssilby	return (m0);
1327112777Ssilbynospace:
1328116455Ssilby#ifdef MBUF_STRESS_TEST
1329112777Ssilby	m_defragfailure++;
1330116455Ssilby#endif
1331112777Ssilby	if (m_final)
1332112777Ssilby		m_freem(m_final);
1333112777Ssilby	return (NULL);
1334112777Ssilby}
1335119644Ssilby
1336175414Ssam/*
1337175414Ssam * Defragment an mbuf chain, returning at most maxfrags separate
1338175414Ssam * mbufs+clusters.  If this is not possible NULL is returned and
1339175414Ssam * the original mbuf chain is left in it's present (potentially
1340175414Ssam * modified) state.  We use two techniques: collapsing consecutive
1341175414Ssam * mbufs and replacing consecutive mbufs by a cluster.
1342175414Ssam *
1343175414Ssam * NB: this should really be named m_defrag but that name is taken
1344175414Ssam */
1345175414Ssamstruct mbuf *
1346175414Ssamm_collapse(struct mbuf *m0, int how, int maxfrags)
1347175414Ssam{
1348175414Ssam	struct mbuf *m, *n, *n2, **prev;
1349175414Ssam	u_int curfrags;
1350175414Ssam
1351175414Ssam	/*
1352175414Ssam	 * Calculate the current number of frags.
1353175414Ssam	 */
1354175414Ssam	curfrags = 0;
1355175414Ssam	for (m = m0; m != NULL; m = m->m_next)
1356175414Ssam		curfrags++;
1357175414Ssam	/*
1358175414Ssam	 * First, try to collapse mbufs.  Note that we always collapse
1359175414Ssam	 * towards the front so we don't need to deal with moving the
1360175414Ssam	 * pkthdr.  This may be suboptimal if the first mbuf has much
1361175414Ssam	 * less data than the following.
1362175414Ssam	 */
1363175414Ssam	m = m0;
1364175414Ssamagain:
1365175414Ssam	for (;;) {
1366175414Ssam		n = m->m_next;
1367175414Ssam		if (n == NULL)
1368175414Ssam			break;
1369242256Sandre		if (M_WRITABLE(m) &&
1370175414Ssam		    n->m_len < M_TRAILINGSPACE(m)) {
1371175414Ssam			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
1372175414Ssam				n->m_len);
1373175414Ssam			m->m_len += n->m_len;
1374175414Ssam			m->m_next = n->m_next;
1375175414Ssam			m_free(n);
1376175414Ssam			if (--curfrags <= maxfrags)
1377175414Ssam				return m0;
1378175414Ssam		} else
1379175414Ssam			m = n;
1380175414Ssam	}
1381175414Ssam	KASSERT(maxfrags > 1,
1382175414Ssam		("maxfrags %u, but normal collapse failed", maxfrags));
1383175414Ssam	/*
1384175414Ssam	 * Collapse consecutive mbufs to a cluster.
1385175414Ssam	 */
1386175414Ssam	prev = &m0->m_next;		/* NB: not the first mbuf */
1387175414Ssam	while ((n = *prev) != NULL) {
1388175414Ssam		if ((n2 = n->m_next) != NULL &&
1389175414Ssam		    n->m_len + n2->m_len < MCLBYTES) {
1390175414Ssam			m = m_getcl(how, MT_DATA, 0);
1391175414Ssam			if (m == NULL)
1392175414Ssam				goto bad;
1393175414Ssam			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
1394175414Ssam			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
1395175414Ssam				n2->m_len);
1396175414Ssam			m->m_len = n->m_len + n2->m_len;
1397175414Ssam			m->m_next = n2->m_next;
1398175414Ssam			*prev = m;
1399175414Ssam			m_free(n);
1400175414Ssam			m_free(n2);
1401175414Ssam			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
1402175414Ssam				return m0;
1403175414Ssam			/*
1404175414Ssam			 * Still not there, try the normal collapse
1405175414Ssam			 * again before we allocate another cluster.
1406175414Ssam			 */
1407175414Ssam			goto again;
1408175414Ssam		}
1409175414Ssam		prev = &n->m_next;
1410175414Ssam	}
1411175414Ssam	/*
1412175414Ssam	 * No place where we can collapse to a cluster; punt.
1413175414Ssam	 * This can occur if, for example, you request 2 frags
1414175414Ssam	 * but the packet requires that both be clusters (we
1415175414Ssam	 * never reallocate the first mbuf to avoid moving the
1416175414Ssam	 * packet header).
1417175414Ssam	 */
1418175414Ssambad:
1419175414Ssam	return NULL;
1420175414Ssam}
1421175414Ssam
1422119644Ssilby#ifdef MBUF_STRESS_TEST
1423119644Ssilby
1424119644Ssilby/*
1425119644Ssilby * Fragment an mbuf chain.  There's no reason you'd ever want to do
1426119644Ssilby * this in normal usage, but it's great for stress testing various
1427119644Ssilby * mbuf consumers.
1428119644Ssilby *
1429119644Ssilby * If fragmentation is not possible, the original chain will be
1430119644Ssilby * returned.
1431119644Ssilby *
1432119644Ssilby * Possible length values:
1433119644Ssilby * 0	 no fragmentation will occur
1434119644Ssilby * > 0	each fragment will be of the specified length
1435119644Ssilby * -1	each fragment will be the same random value in length
1436119644Ssilby * -2	each fragment's length will be entirely random
1437119644Ssilby * (Random values range from 1 to 256)
1438119644Ssilby */
1439119644Ssilbystruct mbuf *
1440119644Ssilbym_fragment(struct mbuf *m0, int how, int length)
1441119644Ssilby{
1442125472Ssilby	struct mbuf *m_new = NULL, *m_final = NULL;
1443125472Ssilby	int progress = 0;
1444119644Ssilby
1445119644Ssilby	if (!(m0->m_flags & M_PKTHDR))
1446119644Ssilby		return (m0);
1447266876Sglebius
1448119644Ssilby	if ((length == 0) || (length < -2))
1449119644Ssilby		return (m0);
1450119644Ssilby
1451119644Ssilby	m_fixhdr(m0); /* Needed sanity check */
1452119644Ssilby
1453119644Ssilby	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1454119644Ssilby
1455119644Ssilby	if (m_final == NULL)
1456119644Ssilby		goto nospace;
1457119644Ssilby
1458123823Ssilby	if (m_dup_pkthdr(m_final, m0, how) == 0)
1459119644Ssilby		goto nospace;
1460119644Ssilby
1461119644Ssilby	m_new = m_final;
1462119644Ssilby
1463119644Ssilby	if (length == -1)
1464119644Ssilby		length = 1 + (arc4random() & 255);
1465119644Ssilby
1466119644Ssilby	while (progress < m0->m_pkthdr.len) {
1467119644Ssilby		int fraglen;
1468119644Ssilby
1469119644Ssilby		if (length > 0)
1470119644Ssilby			fraglen = length;
1471119644Ssilby		else
1472119644Ssilby			fraglen = 1 + (arc4random() & 255);
1473119644Ssilby		if (fraglen > m0->m_pkthdr.len - progress)
1474119644Ssilby			fraglen = m0->m_pkthdr.len - progress;
1475119644Ssilby
1476119644Ssilby		if (fraglen > MCLBYTES)
1477119644Ssilby			fraglen = MCLBYTES;
1478119644Ssilby
1479119644Ssilby		if (m_new == NULL) {
1480119644Ssilby			m_new = m_getcl(how, MT_DATA, 0);
1481119644Ssilby			if (m_new == NULL)
1482119644Ssilby				goto nospace;
1483119644Ssilby		}
1484119644Ssilby
1485119644Ssilby		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1486119644Ssilby		progress += fraglen;
1487119644Ssilby		m_new->m_len = fraglen;
1488119644Ssilby		if (m_new != m_final)
1489119644Ssilby			m_cat(m_final, m_new);
1490119644Ssilby		m_new = NULL;
1491119644Ssilby	}
1492119644Ssilby	m_freem(m0);
1493119644Ssilby	m0 = m_final;
1494119644Ssilby	return (m0);
1495119644Ssilbynospace:
1496119644Ssilby	if (m_final)
1497119644Ssilby		m_freem(m_final);
1498119644Ssilby	/* Return the original chain on failure */
1499119644Ssilby	return (m0);
1500119644Ssilby}
1501119644Ssilby
1502119644Ssilby#endif
1503125296Ssilby
1504163915Sandre/*
1505163915Sandre * Copy the contents of uio into a properly sized mbuf chain.
1506163915Sandre */
1507125296Ssilbystruct mbuf *
1508163915Sandrem_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
1509125296Ssilby{
1510163915Sandre	struct mbuf *m, *mb;
1511231949Skib	int error, length;
1512231949Skib	ssize_t total;
1513163915Sandre	int progress = 0;
1514125296Ssilby
1515163915Sandre	/*
1516163915Sandre	 * len can be zero or an arbitrary large value bound by
1517163915Sandre	 * the total data supplied by the uio.
1518163915Sandre	 */
1519125296Ssilby	if (len > 0)
1520125296Ssilby		total = min(uio->uio_resid, len);
1521125296Ssilby	else
1522125296Ssilby		total = uio->uio_resid;
1523163915Sandre
1524163915Sandre	/*
1525163915Sandre	 * The smallest unit returned by m_getm2() is a single mbuf
1526182777Sthompsa	 * with pkthdr.  We can't align past it.
1527163915Sandre	 */
1528145883Semax	if (align >= MHLEN)
1529163915Sandre		return (NULL);
1530163915Sandre
1531166171Sandre	/*
1532166171Sandre	 * Give us the full allocation or nothing.
1533166171Sandre	 * If len is zero return the smallest empty mbuf.
1534166171Sandre	 */
1535166171Sandre	m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags);
1536163915Sandre	if (m == NULL)
1537163915Sandre		return (NULL);
1538163915Sandre	m->m_data += align;
1539163915Sandre
1540163915Sandre	/* Fill all mbufs with uio data and update header information. */
1541163915Sandre	for (mb = m; mb != NULL; mb = mb->m_next) {
1542163915Sandre		length = min(M_TRAILINGSPACE(mb), total - progress);
1543163915Sandre
1544163915Sandre		error = uiomove(mtod(mb, void *), length, uio);
1545163915Sandre		if (error) {
1546163915Sandre			m_freem(m);
1547163915Sandre			return (NULL);
1548125296Ssilby		}
1549163915Sandre
1550163915Sandre		mb->m_len = length;
1551125296Ssilby		progress += length;
1552163915Sandre		if (flags & M_PKTHDR)
1553163915Sandre			m->m_pkthdr.len += length;
1554125296Ssilby	}
1555163915Sandre	KASSERT(progress == total, ("%s: progress != total", __func__));
1556163915Sandre
1557163915Sandre	return (m);
1558125296Ssilby}
1559148552Ssam
1560148552Ssam/*
1561194667Sandre * Copy an mbuf chain into a uio limited by len if set.
1562194667Sandre */
1563194667Sandreint
1564194667Sandrem_mbuftouio(struct uio *uio, struct mbuf *m, int len)
1565194667Sandre{
1566194667Sandre	int error, length, total;
1567194667Sandre	int progress = 0;
1568194667Sandre
1569194667Sandre	if (len > 0)
1570194667Sandre		total = min(uio->uio_resid, len);
1571194667Sandre	else
1572194667Sandre		total = uio->uio_resid;
1573194667Sandre
1574194667Sandre	/* Fill the uio with data from the mbufs. */
1575194667Sandre	for (; m != NULL; m = m->m_next) {
1576194667Sandre		length = min(m->m_len, total - progress);
1577194667Sandre
1578194667Sandre		error = uiomove(mtod(m, void *), length, uio);
1579194667Sandre		if (error)
1580194667Sandre			return (error);
1581194667Sandre
1582194667Sandre		progress += length;
1583194667Sandre	}
1584194667Sandre
1585194667Sandre	return (0);
1586194667Sandre}
1587194667Sandre
1588194667Sandre/*
1589156756Ssam * Create a writable copy of the mbuf chain.  While doing this
1590156756Ssam * we compact the chain with a goal of producing a chain with
1591156756Ssam * at most two mbufs.  The second mbuf in this chain is likely
1592156756Ssam * to be a cluster.  The primary purpose of this work is to create
1593156756Ssam * a writable packet for encryption, compression, etc.  The
1594156756Ssam * secondary goal is to linearize the data so the data can be
1595156756Ssam * passed to crypto hardware in the most efficient manner possible.
1596156756Ssam */
1597156756Ssamstruct mbuf *
1598156756Ssamm_unshare(struct mbuf *m0, int how)
1599156756Ssam{
1600156756Ssam	struct mbuf *m, *mprev;
1601156756Ssam	struct mbuf *n, *mfirst, *mlast;
1602156756Ssam	int len, off;
1603156756Ssam
1604156756Ssam	mprev = NULL;
1605156756Ssam	for (m = m0; m != NULL; m = mprev->m_next) {
1606156756Ssam		/*
1607156756Ssam		 * Regular mbufs are ignored unless there's a cluster
1608156756Ssam		 * in front of it that we can use to coalesce.  We do
1609156756Ssam		 * the latter mainly so later clusters can be coalesced
1610156756Ssam		 * also w/o having to handle them specially (i.e. convert
1611156756Ssam		 * mbuf+cluster -> cluster).  This optimization is heavily
1612156756Ssam		 * influenced by the assumption that we're running over
1613156756Ssam		 * Ethernet where MCLBYTES is large enough that the max
1614156756Ssam		 * packet size will permit lots of coalescing into a
1615156756Ssam		 * single cluster.  This in turn permits efficient
1616156756Ssam		 * crypto operations, especially when using hardware.
1617156756Ssam		 */
1618156756Ssam		if ((m->m_flags & M_EXT) == 0) {
1619156756Ssam			if (mprev && (mprev->m_flags & M_EXT) &&
1620156756Ssam			    m->m_len <= M_TRAILINGSPACE(mprev)) {
1621156756Ssam				/* XXX: this ignores mbuf types */
1622156756Ssam				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1623266876Sglebius				    mtod(m, caddr_t), m->m_len);
1624156756Ssam				mprev->m_len += m->m_len;
1625156756Ssam				mprev->m_next = m->m_next;	/* unlink from chain */
1626156756Ssam				m_free(m);			/* reclaim mbuf */
1627156756Ssam#if 0
1628156756Ssam				newipsecstat.ips_mbcoalesced++;
1629156756Ssam#endif
1630156756Ssam			} else {
1631156756Ssam				mprev = m;
1632156756Ssam			}
1633156756Ssam			continue;
1634156756Ssam		}
1635156756Ssam		/*
1636156756Ssam		 * Writable mbufs are left alone (for now).
1637156756Ssam		 */
1638156756Ssam		if (M_WRITABLE(m)) {
1639156756Ssam			mprev = m;
1640156756Ssam			continue;
1641156756Ssam		}
1642156756Ssam
1643156756Ssam		/*
1644156756Ssam		 * Not writable, replace with a copy or coalesce with
1645156756Ssam		 * the previous mbuf if possible (since we have to copy
1646156756Ssam		 * it anyway, we try to reduce the number of mbufs and
1647156756Ssam		 * clusters so that future work is easier).
1648156756Ssam		 */
1649156756Ssam		KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
1650156756Ssam		/* NB: we only coalesce into a cluster or larger */
1651156756Ssam		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
1652156756Ssam		    m->m_len <= M_TRAILINGSPACE(mprev)) {
1653156756Ssam			/* XXX: this ignores mbuf types */
1654156756Ssam			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1655266876Sglebius			    mtod(m, caddr_t), m->m_len);
1656156756Ssam			mprev->m_len += m->m_len;
1657156756Ssam			mprev->m_next = m->m_next;	/* unlink from chain */
1658156756Ssam			m_free(m);			/* reclaim mbuf */
1659156756Ssam#if 0
1660156756Ssam			newipsecstat.ips_clcoalesced++;
1661156756Ssam#endif
1662156756Ssam			continue;
1663156756Ssam		}
1664156756Ssam
1665156756Ssam		/*
1666248371Sglebius		 * Allocate new space to hold the copy and copy the data.
1667248371Sglebius		 * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by
1668248371Sglebius		 * splitting them into clusters.  We could just malloc a
1669248371Sglebius		 * buffer and make it external but too many device drivers
1670248371Sglebius		 * don't know how to break up the non-contiguous memory when
1671248371Sglebius		 * doing DMA.
1672156756Ssam		 */
1673248371Sglebius		n = m_getcl(how, m->m_type, m->m_flags);
1674248371Sglebius		if (n == NULL) {
1675248371Sglebius			m_freem(m0);
1676248371Sglebius			return (NULL);
1677156756Ssam		}
1678288990Sglebius		if (m->m_flags & M_PKTHDR) {
1679288990Sglebius			KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR",
1680288990Sglebius			    __func__, m0, m));
1681288990Sglebius			m_move_pkthdr(n, m);
1682288990Sglebius		}
1683156756Ssam		len = m->m_len;
1684156756Ssam		off = 0;
1685156756Ssam		mfirst = n;
1686156756Ssam		mlast = NULL;
1687156756Ssam		for (;;) {
1688156756Ssam			int cc = min(len, MCLBYTES);
1689156756Ssam			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
1690156756Ssam			n->m_len = cc;
1691156756Ssam			if (mlast != NULL)
1692156756Ssam				mlast->m_next = n;
1693266876Sglebius			mlast = n;
1694156756Ssam#if 0
1695156756Ssam			newipsecstat.ips_clcopied++;
1696156756Ssam#endif
1697156756Ssam
1698156756Ssam			len -= cc;
1699156756Ssam			if (len <= 0)
1700156756Ssam				break;
1701156756Ssam			off += cc;
1702156756Ssam
1703156756Ssam			n = m_getcl(how, m->m_type, m->m_flags);
1704156756Ssam			if (n == NULL) {
1705156756Ssam				m_freem(mfirst);
1706156756Ssam				m_freem(m0);
1707156756Ssam				return (NULL);
1708156756Ssam			}
1709156756Ssam		}
1710266876Sglebius		n->m_next = m->m_next;
1711156756Ssam		if (mprev == NULL)
1712156756Ssam			m0 = mfirst;		/* new head of chain */
1713156756Ssam		else
1714156756Ssam			mprev->m_next = mfirst;	/* replace old mbuf */
1715156756Ssam		m_free(m);			/* release old mbuf */
1716156756Ssam		mprev = mfirst;
1717156756Ssam	}
1718156756Ssam	return (m0);
1719156756Ssam}
1720178674Sjulian
1721178674Sjulian#ifdef MBUF_PROFILING
1722178674Sjulian
1723178674Sjulian#define MP_BUCKETS 32 /* don't just change this as things may overflow.*/
1724178674Sjulianstruct mbufprofile {
1725178700Sjulian	uintmax_t wasted[MP_BUCKETS];
1726178700Sjulian	uintmax_t used[MP_BUCKETS];
1727178700Sjulian	uintmax_t segments[MP_BUCKETS];
1728178674Sjulian} mbprof;
1729178674Sjulian
1730178674Sjulian#define MP_MAXDIGITS 21	/* strlen("16,000,000,000,000,000,000") == 21 */
1731178674Sjulian#define MP_NUMLINES 6
1732178674Sjulian#define MP_NUMSPERLINE 16
1733178674Sjulian#define MP_EXTRABYTES 64	/* > strlen("used:\nwasted:\nsegments:\n") */
1734178674Sjulian/* work out max space needed and add a bit of spare space too */
1735178674Sjulian#define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE)
1736178674Sjulian#define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES)
1737178674Sjulian
1738178674Sjulianchar mbprofbuf[MP_BUFSIZE];
1739178674Sjulian
1740178674Sjulianvoid
1741178674Sjulianm_profile(struct mbuf *m)
1742178674Sjulian{
1743178674Sjulian	int segments = 0;
1744178674Sjulian	int used = 0;
1745178674Sjulian	int wasted = 0;
1746266876Sglebius
1747178674Sjulian	while (m) {
1748178674Sjulian		segments++;
1749178674Sjulian		used += m->m_len;
1750178674Sjulian		if (m->m_flags & M_EXT) {
1751178674Sjulian			wasted += MHLEN - sizeof(m->m_ext) +
1752178674Sjulian			    m->m_ext.ext_size - m->m_len;
1753178674Sjulian		} else {
1754178674Sjulian			if (m->m_flags & M_PKTHDR)
1755178674Sjulian				wasted += MHLEN - m->m_len;
1756178674Sjulian			else
1757178674Sjulian				wasted += MLEN - m->m_len;
1758178674Sjulian		}
1759178674Sjulian		m = m->m_next;
1760178674Sjulian	}
1761178674Sjulian	/* be paranoid.. it helps */
1762178674Sjulian	if (segments > MP_BUCKETS - 1)
1763178674Sjulian		segments = MP_BUCKETS - 1;
1764178674Sjulian	if (used > 100000)
1765178674Sjulian		used = 100000;
1766178674Sjulian	if (wasted > 100000)
1767178674Sjulian		wasted = 100000;
1768178674Sjulian	/* store in the appropriate bucket */
1769178674Sjulian	/* don't bother locking. if it's slightly off, so what? */
1770178674Sjulian	mbprof.segments[segments]++;
1771178674Sjulian	mbprof.used[fls(used)]++;
1772178674Sjulian	mbprof.wasted[fls(wasted)]++;
1773178674Sjulian}
1774178674Sjulian
1775178674Sjulianstatic void
1776178674Sjulianmbprof_textify(void)
1777178674Sjulian{
1778178674Sjulian	int offset;
1779178674Sjulian	char *c;
1780209390Sed	uint64_t *p;
1781178674Sjulian
1782178674Sjulian	p = &mbprof.wasted[0];
1783178674Sjulian	c = mbprofbuf;
1784266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1785178674Sjulian	    "wasted:\n"
1786178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1787178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1788178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1789178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1790178674Sjulian#ifdef BIG_ARRAY
1791178674Sjulian	p = &mbprof.wasted[16];
1792178674Sjulian	c += offset;
1793266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1794178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1795178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1796178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1797178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1798178674Sjulian#endif
1799178674Sjulian	p = &mbprof.used[0];
1800178674Sjulian	c += offset;
1801266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1802178674Sjulian	    "used:\n"
1803178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1804178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1805178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1806178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1807178674Sjulian#ifdef BIG_ARRAY
1808178674Sjulian	p = &mbprof.used[16];
1809178674Sjulian	c += offset;
1810266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1811178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1812178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1813178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1814178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1815178674Sjulian#endif
1816178674Sjulian	p = &mbprof.segments[0];
1817178674Sjulian	c += offset;
1818266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1819178674Sjulian	    "segments:\n"
1820178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1821178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1822178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1823178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1824178674Sjulian#ifdef BIG_ARRAY
1825178674Sjulian	p = &mbprof.segments[16];
1826178674Sjulian	c += offset;
1827266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1828178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1829178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %jju",
1830178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1831178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1832178674Sjulian#endif
1833178674Sjulian}
1834178674Sjulian
1835178674Sjulianstatic int
1836178674Sjulianmbprof_handler(SYSCTL_HANDLER_ARGS)
1837178674Sjulian{
1838178674Sjulian	int error;
1839178674Sjulian
1840178674Sjulian	mbprof_textify();
1841178674Sjulian	error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1);
1842178674Sjulian	return (error);
1843178674Sjulian}
1844178674Sjulian
1845178674Sjulianstatic int
1846178674Sjulianmbprof_clr_handler(SYSCTL_HANDLER_ARGS)
1847178674Sjulian{
1848178674Sjulian	int clear, error;
1849266876Sglebius
1850178674Sjulian	clear = 0;
1851178674Sjulian	error = sysctl_handle_int(oidp, &clear, 0, req);
1852178674Sjulian	if (error || !req->newptr)
1853178674Sjulian		return (error);
1854266876Sglebius
1855178674Sjulian	if (clear) {
1856178674Sjulian		bzero(&mbprof, sizeof(mbprof));
1857178674Sjulian	}
1858266876Sglebius
1859178674Sjulian	return (error);
1860178674Sjulian}
1861178674Sjulian
1862178674Sjulian
1863178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD,
1864178674Sjulian	    NULL, 0, mbprof_handler, "A", "mbuf profiling statistics");
1865178674Sjulian
1866178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW,
1867178674Sjulian	    NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics");
1868178674Sjulian#endif
1869178674Sjulian
1870