1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
301541Srgrimes */
311541Srgrimes
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/uipc_mbuf.c 364163 2020-08-12 12:11:44Z ae $");
34116182Sobrien
3577572Sobrien#include "opt_param.h"
36113490Ssilby#include "opt_mbuf_stress_test.h"
37178674Sjulian#include "opt_mbuf_profiling.h"
38101007Srwatson
391541Srgrimes#include <sys/param.h>
401541Srgrimes#include <sys/systm.h>
4176166Smarkm#include <sys/kernel.h>
42125296Ssilby#include <sys/limits.h>
4376166Smarkm#include <sys/lock.h>
4432036Sbde#include <sys/malloc.h>
451541Srgrimes#include <sys/mbuf.h>
4623081Swollman#include <sys/sysctl.h>
471541Srgrimes#include <sys/domain.h>
481541Srgrimes#include <sys/protosw.h>
49125296Ssilby#include <sys/uio.h>
50297188Sgnn#include <sys/sdt.h>
5176166Smarkm
52297227SgnnSDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
53297188Sgnn    "struct mbuf *", "mbufinfo_t *",
54297188Sgnn    "uint32_t", "uint32_t",
55297188Sgnn    "uint16_t", "uint16_t",
56297188Sgnn    "uint32_t", "uint32_t",
57297188Sgnn    "uint32_t", "uint32_t");
58297188Sgnn
59297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__gethdr,
60297188Sgnn    "uint32_t", "uint32_t",
61297188Sgnn    "uint16_t", "uint16_t",
62297188Sgnn    "struct mbuf *", "mbufinfo_t *");
63297188Sgnn
64297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__get,
65297188Sgnn    "uint32_t", "uint32_t",
66297188Sgnn    "uint16_t", "uint16_t",
67297188Sgnn    "struct mbuf *", "mbufinfo_t *");
68297188Sgnn
69297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__getcl,
70297188Sgnn    "uint32_t", "uint32_t",
71297188Sgnn    "uint16_t", "uint16_t",
72297188Sgnn    "uint32_t", "uint32_t",
73297188Sgnn    "struct mbuf *", "mbufinfo_t *");
74297188Sgnn
75364163SaeSDT_PROBE_DEFINE5_XLATE(sdt, , , m__getjcl,
76364163Sae    "uint32_t", "uint32_t",
77364163Sae    "uint16_t", "uint16_t",
78364163Sae    "uint32_t", "uint32_t",
79364163Sae    "uint32_t", "uint32_t",
80364163Sae    "struct mbuf *", "mbufinfo_t *");
81364163Sae
82297227SgnnSDT_PROBE_DEFINE3_XLATE(sdt, , , m__clget,
83297188Sgnn    "struct mbuf *", "mbufinfo_t *",
84297188Sgnn    "uint32_t", "uint32_t",
85297188Sgnn    "uint32_t", "uint32_t");
86297188Sgnn
87297227SgnnSDT_PROBE_DEFINE4_XLATE(sdt, , , m__cljget,
88297188Sgnn    "struct mbuf *", "mbufinfo_t *",
89297188Sgnn    "uint32_t", "uint32_t",
90297188Sgnn    "uint32_t", "uint32_t",
91297188Sgnn    "void*", "void*");
92297188Sgnn
93297227SgnnSDT_PROBE_DEFINE(sdt, , , m__cljset);
94297188Sgnn
95297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__free,
96297188Sgnn        "struct mbuf *", "mbufinfo_t *");
97297188Sgnn
98297227SgnnSDT_PROBE_DEFINE1_XLATE(sdt, , , m__freem,
99297188Sgnn    "struct mbuf *", "mbufinfo_t *");
100297188Sgnn
101295547Sglebius#include <security/mac/mac_framework.h>
102295547Sglebius
1039759Sbdeint	max_linkhdr;
1049759Sbdeint	max_protohdr;
1059759Sbdeint	max_hdr;
1069759Sbdeint	max_datalen;
107116455Ssilby#ifdef MBUF_STRESS_TEST
108112777Ssilbyint	m_defragpackets;
109112777Ssilbyint	m_defragbytes;
110112777Ssilbyint	m_defraguseless;
111112777Ssilbyint	m_defragfailure;
112113490Ssilbyint	m_defragrandomfailures;
113113490Ssilby#endif
1141541Srgrimes
11566475Sbmilekic/*
11666475Sbmilekic * sysctl(8) exported objects
11766475Sbmilekic */
118155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD,
119155820Sandre	   &max_linkhdr, 0, "Size of largest link layer header");
120155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD,
121155820Sandre	   &max_protohdr, 0, "Size of largest protocol layer header");
122155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD,
123155820Sandre	   &max_hdr, 0, "Size of largest link plus protocol header");
124155820SandreSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD,
125155820Sandre	   &max_datalen, 0, "Minimum space left in mbuf after max_hdr");
126116455Ssilby#ifdef MBUF_STRESS_TEST
127112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
128112777Ssilby	   &m_defragpackets, 0, "");
129112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
130112777Ssilby	   &m_defragbytes, 0, "");
131112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
132112777Ssilby	   &m_defraguseless, 0, "");
133112777SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
134112777Ssilby	   &m_defragfailure, 0, "");
135113490SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
136113490Ssilby	   &m_defragrandomfailures, 0, "");
137113490Ssilby#endif
13875112Sbmilekic
1391541Srgrimes/*
140254973Sandre * Ensure the correct size of various mbuf parameters.  It could be off due
141254973Sandre * to compiler-induced padding and alignment artifacts.
142254973Sandre */
143254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
144254973SandreCTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
145254973Sandre
146254973Sandre/*
147277203Srwatson * mbuf data storage should be 64-bit aligned regardless of architectural
148277203Srwatson * pointer size; check this is the case with and without a packet header.
149277203Srwatson */
150277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0);
151277203SrwatsonCTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0);
152277203Srwatson
153277203Srwatson/*
154277203Srwatson * While the specific values here don't matter too much (i.e., +/- a few
155277203Srwatson * words), we do want to ensure that changes to these values are carefully
156277203Srwatson * reasoned about and properly documented.  This is especially the case as
157277203Srwatson * network-protocol and device-driver modules encode these layouts, and must
158277203Srwatson * be recompiled if the structures change.  Check these values at compile time
159277203Srwatson * against the ones documented in comments in mbuf.h.
160277203Srwatson *
161277203Srwatson * NB: Possibly they should be documented there via #define's and not just
162277203Srwatson * comments.
163277203Srwatson */
164277203Srwatson#if defined(__LP64__)
165277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 32);
166277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 56);
167278920SglebiusCTASSERT(sizeof(struct m_ext) == 48);
168277203Srwatson#else
169277203SrwatsonCTASSERT(offsetof(struct mbuf, m_dat) == 24);
170277203SrwatsonCTASSERT(sizeof(struct pkthdr) == 48);
171278920SglebiusCTASSERT(sizeof(struct m_ext) == 28);
172277203Srwatson#endif
173277203Srwatson
174277203Srwatson/*
175278914Sglebius * Assert that the queue(3) macros produce code of the same size as an old
176278914Sglebius * plain pointer does.
177278914Sglebius */
178278914Sglebius#ifdef INVARIANTS
179313122Smarkjstatic struct mbuf __used m_assertbuf;
180278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next));
181278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next));
182278914SglebiusCTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt));
183278914SglebiusCTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt));
184278914Sglebius#endif
185278914Sglebius
186278914Sglebius/*
187218909Sbrucec * Attach the cluster from *m to *n, set up m_ext in *n
188151976Sandre * and bump the refcount of the cluster.
189151976Sandre */
190289276Shirenvoid
191296242Sglebiusmb_dupcl(struct mbuf *n, struct mbuf *m)
192151976Sandre{
193296242Sglebius	volatile u_int *refcnt;
194151976Sandre
195268535Sglebius	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
196268535Sglebius	KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
197268535Sglebius
198296242Sglebius	n->m_ext = m->m_ext;
199296242Sglebius	n->m_flags |= M_EXT;
200296242Sglebius	n->m_flags |= m->m_flags & M_RDONLY;
201296242Sglebius
202296242Sglebius	/* See if this is the mbuf that holds the embedded refcount. */
203296242Sglebius	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
204296242Sglebius		refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count;
205296242Sglebius		n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF;
206296242Sglebius	} else {
207268535Sglebius		KASSERT(m->m_ext.ext_cnt != NULL,
208268535Sglebius		    ("%s: no refcounting pointer on %p", __func__, m));
209296242Sglebius		refcnt = m->m_ext.ext_cnt;
210268535Sglebius	}
211268535Sglebius
212296242Sglebius	if (*refcnt == 1)
213296242Sglebius		*refcnt += 1;
214296242Sglebius	else
215296242Sglebius		atomic_add_int(refcnt, 1);
216151976Sandre}
217151976Sandre
218284961Snpvoid
219284961Snpm_demote_pkthdr(struct mbuf *m)
220284961Snp{
221284961Snp
222284961Snp	M_ASSERTPKTHDR(m);
223284961Snp
224284961Snp	m_tag_delete_chain(m, NULL);
225284961Snp	m->m_flags &= ~M_PKTHDR;
226284961Snp	bzero(&m->m_pkthdr, sizeof(struct pkthdr));
227284961Snp}
228284961Snp
229151976Sandre/*
230149598Sandre * Clean up mbuf (chain) from any tags and packet headers.
231149647Sandre * If "all" is set then the first mbuf in the chain will be
232149647Sandre * cleaned too.
233149598Sandre */
234149598Sandrevoid
235275329Sglebiusm_demote(struct mbuf *m0, int all, int flags)
236149598Sandre{
237149598Sandre	struct mbuf *m;
238149598Sandre
239149598Sandre	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
240271122Sglebius		KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p",
241271122Sglebius		    __func__, m, m0));
242284961Snp		if (m->m_flags & M_PKTHDR)
243284961Snp			m_demote_pkthdr(m);
244275329Sglebius		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
245149598Sandre	}
246149598Sandre}
247149598Sandre
248149598Sandre/*
249149648Sandre * Sanity checks on mbuf (chain) for use in KASSERT() and general
250149648Sandre * debugging.
251149648Sandre * Returns 0 or panics when bad and 1 on all tests passed.
252149648Sandre * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
253149648Sandre * blow up later.
254149599Sandre */
255149599Sandreint
256149599Sandrem_sanity(struct mbuf *m0, int sanitize)
257149599Sandre{
258149599Sandre	struct mbuf *m;
259149599Sandre	caddr_t a, b;
260149599Sandre	int pktlen = 0;
261149599Sandre
262168734Skmacy#ifdef INVARIANTS
263168734Skmacy#define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m)
264266876Sglebius#else
265168734Skmacy#define	M_SANITY_ACTION(s)	printf("mbuf %p: " s, m)
266168734Skmacy#endif
267149599Sandre
268149648Sandre	for (m = m0; m != NULL; m = m->m_next) {
269149599Sandre		/*
270149599Sandre		 * Basic pointer checks.  If any of these fails then some
271149599Sandre		 * unrelated kernel memory before or after us is trashed.
272149599Sandre		 * No way to recover from that.
273149599Sandre		 */
274276818Srwatson		a = M_START(m);
275276818Srwatson		b = a + M_SIZE(m);
276149599Sandre		if ((caddr_t)m->m_data < a)
277149599Sandre			M_SANITY_ACTION("m_data outside mbuf data range left");
278149599Sandre		if ((caddr_t)m->m_data > b)
279149599Sandre			M_SANITY_ACTION("m_data outside mbuf data range right");
280149599Sandre		if ((caddr_t)m->m_data + m->m_len > b)
281149599Sandre			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
282149599Sandre
283149599Sandre		/* m->m_nextpkt may only be set on first mbuf in chain. */
284149648Sandre		if (m != m0 && m->m_nextpkt != NULL) {
285149599Sandre			if (sanitize) {
286149599Sandre				m_freem(m->m_nextpkt);
287149599Sandre				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
288149599Sandre			} else
289149599Sandre				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
290149599Sandre		}
291149599Sandre
292149599Sandre		/* packet length (not mbuf length!) calculation */
293149599Sandre		if (m0->m_flags & M_PKTHDR)
294149599Sandre			pktlen += m->m_len;
295149599Sandre
296149599Sandre		/* m_tags may only be attached to first mbuf in chain. */
297149599Sandre		if (m != m0 && m->m_flags & M_PKTHDR &&
298149599Sandre		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
299149599Sandre			if (sanitize) {
300149599Sandre				m_tag_delete_chain(m, NULL);
301149599Sandre				/* put in 0xDEADC0DE perhaps? */
302149648Sandre			} else
303149599Sandre				M_SANITY_ACTION("m_tags on in-chain mbuf");
304149599Sandre		}
305149599Sandre
306149599Sandre		/* M_PKTHDR may only be set on first mbuf in chain */
307149599Sandre		if (m != m0 && m->m_flags & M_PKTHDR) {
308149599Sandre			if (sanitize) {
309149599Sandre				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
310149599Sandre				m->m_flags &= ~M_PKTHDR;
311149599Sandre				/* put in 0xDEADCODE and leave hdr flag in */
312149599Sandre			} else
313149599Sandre				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
314149599Sandre		}
315149599Sandre	}
316149648Sandre	m = m0;
317149648Sandre	if (pktlen && pktlen != m->m_pkthdr.len) {
318149599Sandre		if (sanitize)
319149648Sandre			m->m_pkthdr.len = 0;
320149599Sandre		else
321149599Sandre			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
322149599Sandre	}
323149648Sandre	return 1;
324149648Sandre
325149599Sandre#undef	M_SANITY_ACTION
326149599Sandre}
327149599Sandre
328295547Sglebius/*
329295547Sglebius * Non-inlined part of m_init().
330295547Sglebius */
331295547Sglebiusint
332295547Sglebiusm_pkthdr_init(struct mbuf *m, int how)
333295547Sglebius{
334295547Sglebius#ifdef MAC
335295547Sglebius	int error;
336295547Sglebius#endif
337295547Sglebius	m->m_data = m->m_pktdat;
338295547Sglebius	bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
339295547Sglebius#ifdef MAC
340295547Sglebius	/* If the label init fails, fail the alloc */
341295547Sglebius	error = mac_mbuf_init(m, how);
342295547Sglebius	if (error)
343295547Sglebius		return (error);
344295547Sglebius#endif
345149599Sandre
346295547Sglebius	return (0);
347295547Sglebius}
348295547Sglebius
349149599Sandre/*
350108466Ssam * "Move" mbuf pkthdr from "from" to "to".
351100960Srwatson * "from" must have M_PKTHDR set, and "to" must be empty.
352100960Srwatson */
353100960Srwatsonvoid
354108466Ssamm_move_pkthdr(struct mbuf *to, struct mbuf *from)
355100960Srwatson{
356100960Srwatson
357100960Srwatson#if 0
358108466Ssam	/* see below for why these are not enabled */
359113255Sdes	M_ASSERTPKTHDR(to);
360113487Srwatson	/* Note: with MAC, this may not be a good assertion. */
361108466Ssam	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
362108466Ssam	    ("m_move_pkthdr: to has tags"));
363100960Srwatson#endif
364101007Srwatson#ifdef MAC
365113487Srwatson	/*
366113487Srwatson	 * XXXMAC: It could be this should also occur for non-MAC?
367113487Srwatson	 */
368101007Srwatson	if (to->m_flags & M_PKTHDR)
369113487Srwatson		m_tag_delete_chain(to, NULL);
370101007Srwatson#endif
371143302Ssam	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
372143302Ssam	if ((to->m_flags & M_EXT) == 0)
373143302Ssam		to->m_data = to->m_pktdat;
374108466Ssam	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
375108466Ssam	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
376108466Ssam	from->m_flags &= ~M_PKTHDR;
377108466Ssam}
378108466Ssam
379108466Ssam/*
380108466Ssam * Duplicate "from"'s mbuf pkthdr in "to".
381108466Ssam * "from" must have M_PKTHDR set, and "to" must be empty.
382108466Ssam * In particular, this does a deep copy of the packet tags.
383108466Ssam */
384108466Ssamint
385286450Smelifarom_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
386108466Ssam{
387108466Ssam
388108466Ssam#if 0
389108466Ssam	/*
390108466Ssam	 * The mbuf allocator only initializes the pkthdr
391248372Sglebius	 * when the mbuf is allocated with m_gethdr(). Many users
392248372Sglebius	 * (e.g. m_copy*, m_prepend) use m_get() and then
393108466Ssam	 * smash the pkthdr as needed causing these
394108466Ssam	 * assertions to trip.  For now just disable them.
395108466Ssam	 */
396113255Sdes	M_ASSERTPKTHDR(to);
397113487Srwatson	/* Note: with MAC, this may not be a good assertion. */
398108466Ssam	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
399108466Ssam#endif
400132488Salfred	MBUF_CHECKSLEEP(how);
401108466Ssam#ifdef MAC
402108466Ssam	if (to->m_flags & M_PKTHDR)
403113487Srwatson		m_tag_delete_chain(to, NULL);
404108466Ssam#endif
405112733Ssilby	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
406112733Ssilby	if ((to->m_flags & M_EXT) == 0)
407112733Ssilby		to->m_data = to->m_pktdat;
408100960Srwatson	to->m_pkthdr = from->m_pkthdr;
409108466Ssam	SLIST_INIT(&to->m_pkthdr.tags);
410260716Sglebius	return (m_tag_copy_chain(to, from, how));
411100960Srwatson}
412100960Srwatson
413100960Srwatson/*
4141541Srgrimes * Lesser-used path for M_PREPEND:
4151541Srgrimes * allocate new mbuf to prepend to chain,
4161541Srgrimes * copy junk along.
4171541Srgrimes */
4181541Srgrimesstruct mbuf *
41972356Sbmilekicm_prepend(struct mbuf *m, int len, int how)
4201541Srgrimes{
4211541Srgrimes	struct mbuf *mn;
4221541Srgrimes
423117770Ssilby	if (m->m_flags & M_PKTHDR)
424248372Sglebius		mn = m_gethdr(how, m->m_type);
425117770Ssilby	else
426248372Sglebius		mn = m_get(how, m->m_type);
42772356Sbmilekic	if (mn == NULL) {
4281541Srgrimes		m_freem(m);
42972356Sbmilekic		return (NULL);
4301541Srgrimes	}
431113487Srwatson	if (m->m_flags & M_PKTHDR)
432248372Sglebius		m_move_pkthdr(mn, m);
4331541Srgrimes	mn->m_next = m;
4341541Srgrimes	m = mn;
435276692Srwatson	if (len < M_SIZE(m))
436276692Srwatson		M_ALIGN(m, len);
4371541Srgrimes	m->m_len = len;
4381541Srgrimes	return (m);
4391541Srgrimes}
4401541Srgrimes
4411541Srgrimes/*
4421541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
4431541Srgrimes * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
444243882Sglebius * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
44554002Sarchie * Note that the copy is read-only, because clusters are not copied,
44654002Sarchie * only their reference counts are incremented.
4471541Srgrimes */
4481541Srgrimesstruct mbuf *
449296242Sglebiusm_copym(struct mbuf *m, int off0, int len, int wait)
4501541Srgrimes{
45172356Sbmilekic	struct mbuf *n, **np;
45272356Sbmilekic	int off = off0;
4531541Srgrimes	struct mbuf *top;
4541541Srgrimes	int copyhdr = 0;
4551541Srgrimes
45652201Salfred	KASSERT(off >= 0, ("m_copym, negative off %d", off));
45752201Salfred	KASSERT(len >= 0, ("m_copym, negative len %d", len));
458132488Salfred	MBUF_CHECKSLEEP(wait);
4591541Srgrimes	if (off == 0 && m->m_flags & M_PKTHDR)
4601541Srgrimes		copyhdr = 1;
4611541Srgrimes	while (off > 0) {
46252201Salfred		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
4631541Srgrimes		if (off < m->m_len)
4641541Srgrimes			break;
4651541Srgrimes		off -= m->m_len;
4661541Srgrimes		m = m->m_next;
4671541Srgrimes	}
4681541Srgrimes	np = &top;
469298069Spfg	top = NULL;
4701541Srgrimes	while (len > 0) {
47172356Sbmilekic		if (m == NULL) {
472266876Sglebius			KASSERT(len == M_COPYALL,
47352201Salfred			    ("m_copym, length > size of mbuf chain"));
4741541Srgrimes			break;
4751541Srgrimes		}
476117770Ssilby		if (copyhdr)
477248372Sglebius			n = m_gethdr(wait, m->m_type);
478117770Ssilby		else
479248372Sglebius			n = m_get(wait, m->m_type);
4801541Srgrimes		*np = n;
48172356Sbmilekic		if (n == NULL)
4821541Srgrimes			goto nospace;
4831541Srgrimes		if (copyhdr) {
484108466Ssam			if (!m_dup_pkthdr(n, m, wait))
485108466Ssam				goto nospace;
4861541Srgrimes			if (len == M_COPYALL)
4871541Srgrimes				n->m_pkthdr.len -= off0;
4881541Srgrimes			else
4891541Srgrimes				n->m_pkthdr.len = len;
4901541Srgrimes			copyhdr = 0;
4911541Srgrimes		}
4921541Srgrimes		n->m_len = min(len, m->m_len - off);
4931541Srgrimes		if (m->m_flags & M_EXT) {
4941541Srgrimes			n->m_data = m->m_data + off;
495151976Sandre			mb_dupcl(n, m);
4961541Srgrimes		} else
4971541Srgrimes			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
498103569Sbmilekic			    (u_int)n->m_len);
4991541Srgrimes		if (len != M_COPYALL)
5001541Srgrimes			len -= n->m_len;
5011541Srgrimes		off = 0;
5021541Srgrimes		m = m->m_next;
5031541Srgrimes		np = &n->m_next;
5041541Srgrimes	}
50578592Sbmilekic
5061541Srgrimes	return (top);
5071541Srgrimesnospace:
5081541Srgrimes	m_freem(top);
50972356Sbmilekic	return (NULL);
5101541Srgrimes}
5111541Srgrimes
5121541Srgrimes/*
51315689Swollman * Copy an entire packet, including header (which must be present).
51415689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
51554002Sarchie * Note that the copy is read-only, because clusters are not copied,
51654002Sarchie * only their reference counts are incremented.
51772750Sluigi * Preserve alignment of the first mbuf so if the creator has left
51872750Sluigi * some room at the beginning (e.g. for inserting protocol headers)
51972750Sluigi * the copies still have the room available.
52015689Swollman */
52115689Swollmanstruct mbuf *
52272356Sbmilekicm_copypacket(struct mbuf *m, int how)
52315689Swollman{
52415689Swollman	struct mbuf *top, *n, *o;
52515689Swollman
526132488Salfred	MBUF_CHECKSLEEP(how);
527248372Sglebius	n = m_get(how, m->m_type);
52815689Swollman	top = n;
52972356Sbmilekic	if (n == NULL)
53015689Swollman		goto nospace;
53115689Swollman
532108466Ssam	if (!m_dup_pkthdr(n, m, how))
533108466Ssam		goto nospace;
53415689Swollman	n->m_len = m->m_len;
53515689Swollman	if (m->m_flags & M_EXT) {
53615689Swollman		n->m_data = m->m_data;
537151976Sandre		mb_dupcl(n, m);
53815689Swollman	} else {
53972750Sluigi		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
54015689Swollman		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
54115689Swollman	}
54215689Swollman
54315689Swollman	m = m->m_next;
54415689Swollman	while (m) {
545248372Sglebius		o = m_get(how, m->m_type);
54672356Sbmilekic		if (o == NULL)
54715689Swollman			goto nospace;
54815689Swollman
54915689Swollman		n->m_next = o;
55015689Swollman		n = n->m_next;
55115689Swollman
55215689Swollman		n->m_len = m->m_len;
55315689Swollman		if (m->m_flags & M_EXT) {
55415689Swollman			n->m_data = m->m_data;
555151976Sandre			mb_dupcl(n, m);
55615689Swollman		} else {
55715689Swollman			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
55815689Swollman		}
55915689Swollman
56015689Swollman		m = m->m_next;
56115689Swollman	}
56215689Swollman	return top;
56315689Swollmannospace:
56415689Swollman	m_freem(top);
56572356Sbmilekic	return (NULL);
56615689Swollman}
56715689Swollman
56815689Swollman/*
5691541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning,
5701541Srgrimes * continuing for "len" bytes, into the indicated buffer.
5711541Srgrimes */
5721549Srgrimesvoid
57381907Sjulianm_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
5741541Srgrimes{
575103569Sbmilekic	u_int count;
5761541Srgrimes
57752201Salfred	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
57852201Salfred	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
5791541Srgrimes	while (off > 0) {
58052201Salfred		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
5811541Srgrimes		if (off < m->m_len)
5821541Srgrimes			break;
5831541Srgrimes		off -= m->m_len;
5841541Srgrimes		m = m->m_next;
5851541Srgrimes	}
5861541Srgrimes	while (len > 0) {
58752201Salfred		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
5881541Srgrimes		count = min(m->m_len - off, len);
5891541Srgrimes		bcopy(mtod(m, caddr_t) + off, cp, count);
5901541Srgrimes		len -= count;
5911541Srgrimes		cp += count;
5921541Srgrimes		off = 0;
5931541Srgrimes		m = m->m_next;
5941541Srgrimes	}
5951541Srgrimes}
5961541Srgrimes
5971541Srgrimes/*
59854002Sarchie * Copy a packet header mbuf chain into a completely new chain, including
59954002Sarchie * copying any mbuf clusters.  Use this instead of m_copypacket() when
60054002Sarchie * you need a writable copy of an mbuf chain.
60154002Sarchie */
60254002Sarchiestruct mbuf *
603286450Smelifarom_dup(const struct mbuf *m, int how)
60454002Sarchie{
60554002Sarchie	struct mbuf **p, *top = NULL;
60654002Sarchie	int remain, moff, nsize;
60754002Sarchie
608132488Salfred	MBUF_CHECKSLEEP(how);
60954002Sarchie	/* Sanity check */
61054002Sarchie	if (m == NULL)
61172356Sbmilekic		return (NULL);
612113255Sdes	M_ASSERTPKTHDR(m);
61354002Sarchie
61454002Sarchie	/* While there's more data, get a new mbuf, tack it on, and fill it */
61554002Sarchie	remain = m->m_pkthdr.len;
61654002Sarchie	moff = 0;
61754002Sarchie	p = &top;
61854002Sarchie	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
61954002Sarchie		struct mbuf *n;
62054002Sarchie
62154002Sarchie		/* Get the next new mbuf */
622129906Sbmilekic		if (remain >= MINCLSIZE) {
623129906Sbmilekic			n = m_getcl(how, m->m_type, 0);
624129906Sbmilekic			nsize = MCLBYTES;
625129906Sbmilekic		} else {
626129906Sbmilekic			n = m_get(how, m->m_type);
627129906Sbmilekic			nsize = MLEN;
628129906Sbmilekic		}
62954002Sarchie		if (n == NULL)
63054002Sarchie			goto nospace;
631129906Sbmilekic
632129906Sbmilekic		if (top == NULL) {		/* First one, must be PKTHDR */
633129906Sbmilekic			if (!m_dup_pkthdr(n, m, how)) {
634129906Sbmilekic				m_free(n);
635108466Ssam				goto nospace;
636129906Sbmilekic			}
637153428Semaste			if ((n->m_flags & M_EXT) == 0)
638153428Semaste				nsize = MHLEN;
639282594Sae			n->m_flags &= ~M_RDONLY;
64054002Sarchie		}
64154002Sarchie		n->m_len = 0;
64254002Sarchie
64354002Sarchie		/* Link it into the new chain */
64454002Sarchie		*p = n;
64554002Sarchie		p = &n->m_next;
64654002Sarchie
64754002Sarchie		/* Copy data from original mbuf(s) into new mbuf */
64854002Sarchie		while (n->m_len < nsize && m != NULL) {
64954002Sarchie			int chunk = min(nsize - n->m_len, m->m_len - moff);
65054002Sarchie
65154002Sarchie			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
65254002Sarchie			moff += chunk;
65354002Sarchie			n->m_len += chunk;
65454002Sarchie			remain -= chunk;
65554002Sarchie			if (moff == m->m_len) {
65654002Sarchie				m = m->m_next;
65754002Sarchie				moff = 0;
65854002Sarchie			}
65954002Sarchie		}
66054002Sarchie
66154002Sarchie		/* Check correct total mbuf length */
66254002Sarchie		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
66387594Sobrien		    	("%s: bogus m_pkthdr.len", __func__));
66454002Sarchie	}
66554002Sarchie	return (top);
66654002Sarchie
66754002Sarchienospace:
66854002Sarchie	m_freem(top);
66972356Sbmilekic	return (NULL);
67054002Sarchie}
67154002Sarchie
67254002Sarchie/*
6731541Srgrimes * Concatenate mbuf chain n to m.
6741541Srgrimes * Both chains must be of the same type (e.g. MT_DATA).
6751541Srgrimes * Any m_pkthdr is not updated.
6761541Srgrimes */
6771549Srgrimesvoid
67872356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n)
6791541Srgrimes{
6801541Srgrimes	while (m->m_next)
6811541Srgrimes		m = m->m_next;
6821541Srgrimes	while (n) {
683242256Sandre		if (!M_WRITABLE(m) ||
684242256Sandre		    M_TRAILINGSPACE(m) < n->m_len) {
6851541Srgrimes			/* just join the two chains */
6861541Srgrimes			m->m_next = n;
6871541Srgrimes			return;
6881541Srgrimes		}
6891541Srgrimes		/* splat the data from one into the other */
6901541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
6911541Srgrimes		    (u_int)n->m_len);
6921541Srgrimes		m->m_len += n->m_len;
6931541Srgrimes		n = m_free(n);
6941541Srgrimes	}
6951541Srgrimes}
6961541Srgrimes
697271088Sglebius/*
698271088Sglebius * Concatenate two pkthdr mbuf chains.
699271088Sglebius */
7001549Srgrimesvoid
701271088Sglebiusm_catpkt(struct mbuf *m, struct mbuf *n)
702271088Sglebius{
703271088Sglebius
704271088Sglebius	M_ASSERTPKTHDR(m);
705271088Sglebius	M_ASSERTPKTHDR(n);
706271088Sglebius
707271088Sglebius	m->m_pkthdr.len += n->m_pkthdr.len;
708275329Sglebius	m_demote(n, 1, 0);
709271088Sglebius
710271088Sglebius	m_cat(m, n);
711271088Sglebius}
712271088Sglebius
713271088Sglebiusvoid
71472356Sbmilekicm_adj(struct mbuf *mp, int req_len)
7151541Srgrimes{
71672356Sbmilekic	int len = req_len;
71772356Sbmilekic	struct mbuf *m;
71872356Sbmilekic	int count;
7191541Srgrimes
7201541Srgrimes	if ((m = mp) == NULL)
7211541Srgrimes		return;
7221541Srgrimes	if (len >= 0) {
7231541Srgrimes		/*
7241541Srgrimes		 * Trim from head.
7251541Srgrimes		 */
7261541Srgrimes		while (m != NULL && len > 0) {
7271541Srgrimes			if (m->m_len <= len) {
7281541Srgrimes				len -= m->m_len;
7291541Srgrimes				m->m_len = 0;
7301541Srgrimes				m = m->m_next;
7311541Srgrimes			} else {
7321541Srgrimes				m->m_len -= len;
7331541Srgrimes				m->m_data += len;
7341541Srgrimes				len = 0;
7351541Srgrimes			}
7361541Srgrimes		}
7371541Srgrimes		if (mp->m_flags & M_PKTHDR)
738207475Szec			mp->m_pkthdr.len -= (req_len - len);
7391541Srgrimes	} else {
7401541Srgrimes		/*
7411541Srgrimes		 * Trim from tail.  Scan the mbuf chain,
7421541Srgrimes		 * calculating its length and finding the last mbuf.
7431541Srgrimes		 * If the adjustment only affects this mbuf, then just
7441541Srgrimes		 * adjust and return.  Otherwise, rescan and truncate
7451541Srgrimes		 * after the remaining size.
7461541Srgrimes		 */
7471541Srgrimes		len = -len;
7481541Srgrimes		count = 0;
7491541Srgrimes		for (;;) {
7501541Srgrimes			count += m->m_len;
7511541Srgrimes			if (m->m_next == (struct mbuf *)0)
7521541Srgrimes				break;
7531541Srgrimes			m = m->m_next;
7541541Srgrimes		}
7551541Srgrimes		if (m->m_len >= len) {
7561541Srgrimes			m->m_len -= len;
7571541Srgrimes			if (mp->m_flags & M_PKTHDR)
7581541Srgrimes				mp->m_pkthdr.len -= len;
7591541Srgrimes			return;
7601541Srgrimes		}
7611541Srgrimes		count -= len;
7621541Srgrimes		if (count < 0)
7631541Srgrimes			count = 0;
7641541Srgrimes		/*
7651541Srgrimes		 * Correct length for chain is "count".
7661541Srgrimes		 * Find the mbuf with last data, adjust its length,
7671541Srgrimes		 * and toss data from remaining mbufs on chain.
7681541Srgrimes		 */
7691541Srgrimes		m = mp;
7701541Srgrimes		if (m->m_flags & M_PKTHDR)
7711541Srgrimes			m->m_pkthdr.len = count;
7721541Srgrimes		for (; m; m = m->m_next) {
7731541Srgrimes			if (m->m_len >= count) {
7741541Srgrimes				m->m_len = count;
775142350Ssam				if (m->m_next != NULL) {
776142350Ssam					m_freem(m->m_next);
777142350Ssam					m->m_next = NULL;
778142350Ssam				}
7791541Srgrimes				break;
7801541Srgrimes			}
7811541Srgrimes			count -= m->m_len;
7821541Srgrimes		}
7831541Srgrimes	}
7841541Srgrimes}
7851541Srgrimes
7861541Srgrimes/*
7871541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous
788242304Skevlo * and in the data area of an mbuf (so that mtod will work
789242304Skevlo * for a structure of size len).  Returns the resulting
7901541Srgrimes * mbuf chain on success, frees it and returns null on failure.
7911541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the
7921541Srgrimes * contiguous region in an attempt to avoid being called next time.
7931541Srgrimes */
7941541Srgrimesstruct mbuf *
79572356Sbmilekicm_pullup(struct mbuf *n, int len)
7961541Srgrimes{
79772356Sbmilekic	struct mbuf *m;
79872356Sbmilekic	int count;
7991541Srgrimes	int space;
8001541Srgrimes
8011541Srgrimes	/*
8021541Srgrimes	 * If first mbuf has no cluster, and has room for len bytes
8031541Srgrimes	 * without shifting current data, pullup into it,
8041541Srgrimes	 * otherwise allocate a new mbuf to prepend to the chain.
8051541Srgrimes	 */
8061541Srgrimes	if ((n->m_flags & M_EXT) == 0 &&
8071541Srgrimes	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
8081541Srgrimes		if (n->m_len >= len)
8091541Srgrimes			return (n);
8101541Srgrimes		m = n;
8111541Srgrimes		n = n->m_next;
8121541Srgrimes		len -= m->m_len;
8131541Srgrimes	} else {
8141541Srgrimes		if (len > MHLEN)
8151541Srgrimes			goto bad;
816248372Sglebius		m = m_get(M_NOWAIT, n->m_type);
81772356Sbmilekic		if (m == NULL)
8181541Srgrimes			goto bad;
819108466Ssam		if (n->m_flags & M_PKTHDR)
820248372Sglebius			m_move_pkthdr(m, n);
8211541Srgrimes	}
8221541Srgrimes	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
8231541Srgrimes	do {
8241541Srgrimes		count = min(min(max(len, max_protohdr), space), n->m_len);
8251541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
826103569Sbmilekic		  (u_int)count);
8271541Srgrimes		len -= count;
8281541Srgrimes		m->m_len += count;
8291541Srgrimes		n->m_len -= count;
8301541Srgrimes		space -= count;
8311541Srgrimes		if (n->m_len)
8321541Srgrimes			n->m_data += count;
8331541Srgrimes		else
8341541Srgrimes			n = m_free(n);
8351541Srgrimes	} while (len > 0 && n);
8361541Srgrimes	if (len > 0) {
8371541Srgrimes		(void) m_free(m);
8381541Srgrimes		goto bad;
8391541Srgrimes	}
8401541Srgrimes	m->m_next = n;
8411541Srgrimes	return (m);
8421541Srgrimesbad:
8431541Srgrimes	m_freem(n);
84472356Sbmilekic	return (NULL);
8451541Srgrimes}
8461541Srgrimes
8471541Srgrimes/*
848143761Sjmg * Like m_pullup(), except a new mbuf is always allocated, and we allow
849143761Sjmg * the amount of empty space before the data in the new mbuf to be specified
850143761Sjmg * (in the event that the caller expects to prepend later).
851143761Sjmg */
852143761Sjmgstruct mbuf *
853143761Sjmgm_copyup(struct mbuf *n, int len, int dstoff)
854143761Sjmg{
855143761Sjmg	struct mbuf *m;
856143761Sjmg	int count, space;
857143761Sjmg
858143761Sjmg	if (len > (MHLEN - dstoff))
859143761Sjmg		goto bad;
860248372Sglebius	m = m_get(M_NOWAIT, n->m_type);
861143761Sjmg	if (m == NULL)
862143761Sjmg		goto bad;
863143761Sjmg	if (n->m_flags & M_PKTHDR)
864248372Sglebius		m_move_pkthdr(m, n);
865143761Sjmg	m->m_data += dstoff;
866143761Sjmg	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
867143761Sjmg	do {
868143761Sjmg		count = min(min(max(len, max_protohdr), space), n->m_len);
869143761Sjmg		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
870143761Sjmg		    (unsigned)count);
871143761Sjmg		len -= count;
872143761Sjmg		m->m_len += count;
873143761Sjmg		n->m_len -= count;
874143761Sjmg		space -= count;
875143761Sjmg		if (n->m_len)
876143761Sjmg			n->m_data += count;
877143761Sjmg		else
878143761Sjmg			n = m_free(n);
879143761Sjmg	} while (len > 0 && n);
880143761Sjmg	if (len > 0) {
881143761Sjmg		(void) m_free(m);
882143761Sjmg		goto bad;
883143761Sjmg	}
884143761Sjmg	m->m_next = n;
885143761Sjmg	return (m);
886143761Sjmg bad:
887143761Sjmg	m_freem(n);
888143761Sjmg	return (NULL);
889143761Sjmg}
890143761Sjmg
891143761Sjmg/*
8921541Srgrimes * Partition an mbuf chain in two pieces, returning the tail --
8931541Srgrimes * all but the first len0 bytes.  In case of failure, it returns NULL and
8941541Srgrimes * attempts to restore the chain to its original state.
89597681Sarchie *
89697681Sarchie * Note that the resulting mbufs might be read-only, because the new
89797681Sarchie * mbuf can end up sharing an mbuf cluster with the original mbuf if
89897681Sarchie * the "breaking point" happens to lie within a cluster mbuf. Use the
89997681Sarchie * M_WRITABLE() macro to check for this case.
9001541Srgrimes */
9011541Srgrimesstruct mbuf *
90272356Sbmilekicm_split(struct mbuf *m0, int len0, int wait)
9031541Srgrimes{
90472356Sbmilekic	struct mbuf *m, *n;
905103569Sbmilekic	u_int len = len0, remain;
9061541Srgrimes
907132488Salfred	MBUF_CHECKSLEEP(wait);
9081541Srgrimes	for (m = m0; m && len > m->m_len; m = m->m_next)
9091541Srgrimes		len -= m->m_len;
91072356Sbmilekic	if (m == NULL)
91172356Sbmilekic		return (NULL);
9121541Srgrimes	remain = m->m_len - len;
913248887Sglebius	if (m0->m_flags & M_PKTHDR && remain == 0) {
914248372Sglebius		n = m_gethdr(wait, m0->m_type);
915258128Sglebius		if (n == NULL)
916248887Sglebius			return (NULL);
917248887Sglebius		n->m_next = m->m_next;
918248887Sglebius		m->m_next = NULL;
919248887Sglebius		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
920248887Sglebius		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
921248887Sglebius		m0->m_pkthdr.len = len0;
922248887Sglebius		return (n);
923248887Sglebius	} else if (m0->m_flags & M_PKTHDR) {
924248887Sglebius		n = m_gethdr(wait, m0->m_type);
92572356Sbmilekic		if (n == NULL)
92672356Sbmilekic			return (NULL);
9271541Srgrimes		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9281541Srgrimes		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
9291541Srgrimes		m0->m_pkthdr.len = len0;
9301541Srgrimes		if (m->m_flags & M_EXT)
9311541Srgrimes			goto extpacket;
9321541Srgrimes		if (remain > MHLEN) {
9331541Srgrimes			/* m can't be the lead packet */
934276692Srwatson			M_ALIGN(n, 0);
9351541Srgrimes			n->m_next = m_split(m, len, wait);
93672356Sbmilekic			if (n->m_next == NULL) {
9371541Srgrimes				(void) m_free(n);
93872356Sbmilekic				return (NULL);
93994471Shsu			} else {
94094471Shsu				n->m_len = 0;
9411541Srgrimes				return (n);
94294471Shsu			}
9431541Srgrimes		} else
944276692Srwatson			M_ALIGN(n, remain);
9451541Srgrimes	} else if (remain == 0) {
9461541Srgrimes		n = m->m_next;
94772356Sbmilekic		m->m_next = NULL;
9481541Srgrimes		return (n);
9491541Srgrimes	} else {
950248372Sglebius		n = m_get(wait, m->m_type);
95172356Sbmilekic		if (n == NULL)
95272356Sbmilekic			return (NULL);
9531541Srgrimes		M_ALIGN(n, remain);
9541541Srgrimes	}
9551541Srgrimesextpacket:
9561541Srgrimes	if (m->m_flags & M_EXT) {
9571541Srgrimes		n->m_data = m->m_data + len;
958151976Sandre		mb_dupcl(n, m);
9591541Srgrimes	} else {
9601541Srgrimes		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
9611541Srgrimes	}
9621541Srgrimes	n->m_len = remain;
9631541Srgrimes	m->m_len = len;
9641541Srgrimes	n->m_next = m->m_next;
96572356Sbmilekic	m->m_next = NULL;
9661541Srgrimes	return (n);
9671541Srgrimes}
9681541Srgrimes/*
9691541Srgrimes * Routine to copy from device local memory into mbufs.
97078508Sbmilekic * Note that `off' argument is offset into first mbuf of target chain from
97178508Sbmilekic * which to begin copying the data to.
9721541Srgrimes */
9731541Srgrimesstruct mbuf *
97478508Sbmilekicm_devget(char *buf, int totlen, int off, struct ifnet *ifp,
975169624Srwatson    void (*copy)(char *from, caddr_t to, u_int len))
9761541Srgrimes{
97772356Sbmilekic	struct mbuf *m;
978129906Sbmilekic	struct mbuf *top = NULL, **mp = &top;
97978508Sbmilekic	int len;
9801541Srgrimes
98178508Sbmilekic	if (off < 0 || off > MHLEN)
98278508Sbmilekic		return (NULL);
98378508Sbmilekic
984129906Sbmilekic	while (totlen > 0) {
985129906Sbmilekic		if (top == NULL) {	/* First one, must be PKTHDR */
986129906Sbmilekic			if (totlen + off >= MINCLSIZE) {
987243882Sglebius				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
988129906Sbmilekic				len = MCLBYTES;
989129906Sbmilekic			} else {
990243882Sglebius				m = m_gethdr(M_NOWAIT, MT_DATA);
991129906Sbmilekic				len = MHLEN;
9921541Srgrimes
993129906Sbmilekic				/* Place initial small packet/header at end of mbuf */
994305691Skevlo				if (m && totlen + off + max_linkhdr <= MHLEN) {
995129906Sbmilekic					m->m_data += max_linkhdr;
996129906Sbmilekic					len -= max_linkhdr;
997129906Sbmilekic				}
998129906Sbmilekic			}
999129906Sbmilekic			if (m == NULL)
1000129906Sbmilekic				return NULL;
1001129906Sbmilekic			m->m_pkthdr.rcvif = ifp;
1002129906Sbmilekic			m->m_pkthdr.len = totlen;
1003129906Sbmilekic		} else {
1004129906Sbmilekic			if (totlen + off >= MINCLSIZE) {
1005243882Sglebius				m = m_getcl(M_NOWAIT, MT_DATA, 0);
1006129906Sbmilekic				len = MCLBYTES;
1007129906Sbmilekic			} else {
1008243882Sglebius				m = m_get(M_NOWAIT, MT_DATA);
1009129906Sbmilekic				len = MLEN;
1010129906Sbmilekic			}
101172356Sbmilekic			if (m == NULL) {
10121541Srgrimes				m_freem(top);
1013129906Sbmilekic				return NULL;
10141541Srgrimes			}
10151541Srgrimes		}
101678508Sbmilekic		if (off) {
101778508Sbmilekic			m->m_data += off;
101878508Sbmilekic			len -= off;
101978508Sbmilekic			off = 0;
102078508Sbmilekic		}
102178508Sbmilekic		m->m_len = len = min(totlen, len);
10221541Srgrimes		if (copy)
1023103569Sbmilekic			copy(buf, mtod(m, caddr_t), (u_int)len);
10241541Srgrimes		else
1025103569Sbmilekic			bcopy(buf, mtod(m, caddr_t), (u_int)len);
102678508Sbmilekic		buf += len;
10271541Srgrimes		*mp = m;
10281541Srgrimes		mp = &m->m_next;
10291541Srgrimes		totlen -= len;
10301541Srgrimes	}
10311541Srgrimes	return (top);
10321541Srgrimes}
10333352Sphk
10343352Sphk/*
10353352Sphk * Copy data from a buffer back into the indicated mbuf chain,
10363352Sphk * starting "off" bytes from the beginning, extending the mbuf
10373352Sphk * chain if necessary.
10383352Sphk */
10393352Sphkvoid
1040128402Sluigim_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
10413352Sphk{
104272356Sbmilekic	int mlen;
104372356Sbmilekic	struct mbuf *m = m0, *n;
10443352Sphk	int totlen = 0;
10453352Sphk
104672356Sbmilekic	if (m0 == NULL)
10473352Sphk		return;
10483352Sphk	while (off > (mlen = m->m_len)) {
10493352Sphk		off -= mlen;
10503352Sphk		totlen += mlen;
105172356Sbmilekic		if (m->m_next == NULL) {
1052243882Sglebius			n = m_get(M_NOWAIT, m->m_type);
105372356Sbmilekic			if (n == NULL)
10543352Sphk				goto out;
1055129906Sbmilekic			bzero(mtod(n, caddr_t), MLEN);
10563352Sphk			n->m_len = min(MLEN, len + off);
10573352Sphk			m->m_next = n;
10583352Sphk		}
10593352Sphk		m = m->m_next;
10603352Sphk	}
10613352Sphk	while (len > 0) {
1062187409Smav		if (m->m_next == NULL && (len > m->m_len - off)) {
1063187409Smav			m->m_len += min(len - (m->m_len - off),
1064187409Smav			    M_TRAILINGSPACE(m));
1065187409Smav		}
10663352Sphk		mlen = min (m->m_len - off, len);
1067103569Sbmilekic		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
10683352Sphk		cp += mlen;
10693352Sphk		len -= mlen;
10703352Sphk		mlen += off;
10713352Sphk		off = 0;
10723352Sphk		totlen += mlen;
10733352Sphk		if (len == 0)
10743352Sphk			break;
107572356Sbmilekic		if (m->m_next == NULL) {
1076243882Sglebius			n = m_get(M_NOWAIT, m->m_type);
107772356Sbmilekic			if (n == NULL)
10783352Sphk				break;
10793352Sphk			n->m_len = min(MLEN, len);
10803352Sphk			m->m_next = n;
10813352Sphk		}
10823352Sphk		m = m->m_next;
10833352Sphk	}
10843352Sphkout:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
10853352Sphk		m->m_pkthdr.len = totlen;
10863352Sphk}
108752756Sphk
1088123557Sbms/*
1089138541Ssam * Append the specified data to the indicated mbuf chain,
1090138541Ssam * Extend the mbuf chain if the new data does not fit in
1091138541Ssam * existing space.
1092138541Ssam *
1093138541Ssam * Return 1 if able to complete the job; otherwise 0.
1094138541Ssam */
1095138541Ssamint
1096138541Ssamm_append(struct mbuf *m0, int len, c_caddr_t cp)
1097138541Ssam{
1098138541Ssam	struct mbuf *m, *n;
1099138541Ssam	int remainder, space;
1100138541Ssam
1101138541Ssam	for (m = m0; m->m_next != NULL; m = m->m_next)
1102138541Ssam		;
1103138541Ssam	remainder = len;
1104138541Ssam	space = M_TRAILINGSPACE(m);
1105138541Ssam	if (space > 0) {
1106138541Ssam		/*
1107138541Ssam		 * Copy into available space.
1108138541Ssam		 */
1109138541Ssam		if (space > remainder)
1110138541Ssam			space = remainder;
1111138541Ssam		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1112138541Ssam		m->m_len += space;
1113138541Ssam		cp += space, remainder -= space;
1114138541Ssam	}
1115138541Ssam	while (remainder > 0) {
1116138541Ssam		/*
1117138541Ssam		 * Allocate a new mbuf; could check space
1118138541Ssam		 * and allocate a cluster instead.
1119138541Ssam		 */
1120243882Sglebius		n = m_get(M_NOWAIT, m->m_type);
1121138541Ssam		if (n == NULL)
1122138541Ssam			break;
1123138541Ssam		n->m_len = min(MLEN, remainder);
1124138894Ssam		bcopy(cp, mtod(n, caddr_t), n->m_len);
1125138894Ssam		cp += n->m_len, remainder -= n->m_len;
1126138541Ssam		m->m_next = n;
1127138541Ssam		m = n;
1128138541Ssam	}
1129138541Ssam	if (m0->m_flags & M_PKTHDR)
1130138541Ssam		m0->m_pkthdr.len += len - remainder;
1131138541Ssam	return (remainder == 0);
1132138541Ssam}
1133138541Ssam
1134138541Ssam/*
1135123557Sbms * Apply function f to the data in an mbuf chain starting "off" bytes from
1136123557Sbms * the beginning, continuing for "len" bytes.
1137123557Sbms */
1138123557Sbmsint
1139123557Sbmsm_apply(struct mbuf *m, int off, int len,
1140123564Sbms    int (*f)(void *, void *, u_int), void *arg)
1141123557Sbms{
1142123564Sbms	u_int count;
1143123557Sbms	int rval;
1144123557Sbms
1145123557Sbms	KASSERT(off >= 0, ("m_apply, negative off %d", off));
1146123557Sbms	KASSERT(len >= 0, ("m_apply, negative len %d", len));
1147123557Sbms	while (off > 0) {
1148123557Sbms		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1149123557Sbms		if (off < m->m_len)
1150123557Sbms			break;
1151123557Sbms		off -= m->m_len;
1152123557Sbms		m = m->m_next;
1153123557Sbms	}
1154123557Sbms	while (len > 0) {
1155123557Sbms		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1156123557Sbms		count = min(m->m_len - off, len);
1157123557Sbms		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1158123557Sbms		if (rval)
1159123557Sbms			return (rval);
1160123557Sbms		len -= count;
1161123557Sbms		off = 0;
1162123557Sbms		m = m->m_next;
1163123557Sbms	}
1164123557Sbms	return (0);
1165123557Sbms}
1166123557Sbms
1167123557Sbms/*
1168123557Sbms * Return a pointer to mbuf/offset of location in mbuf chain.
1169123557Sbms */
1170123557Sbmsstruct mbuf *
1171123557Sbmsm_getptr(struct mbuf *m, int loc, int *off)
1172123557Sbms{
1173123557Sbms
1174123557Sbms	while (loc >= 0) {
1175123564Sbms		/* Normal end of search. */
1176123557Sbms		if (m->m_len > loc) {
1177123557Sbms			*off = loc;
1178123557Sbms			return (m);
1179123557Sbms		} else {
1180123557Sbms			loc -= m->m_len;
1181123557Sbms			if (m->m_next == NULL) {
1182123557Sbms				if (loc == 0) {
1183123564Sbms					/* Point at the end of valid data. */
1184123557Sbms					*off = m->m_len;
1185123557Sbms					return (m);
1186123564Sbms				}
1187123564Sbms				return (NULL);
1188123564Sbms			}
1189123564Sbms			m = m->m_next;
1190123557Sbms		}
1191123557Sbms	}
1192123557Sbms	return (NULL);
1193123557Sbms}
1194123557Sbms
119552756Sphkvoid
1196135904Sjmgm_print(const struct mbuf *m, int maxlen)
119752756Sphk{
119852756Sphk	int len;
1199135904Sjmg	int pdata;
120054906Seivind	const struct mbuf *m2;
120152756Sphk
1202230587Sken	if (m == NULL) {
1203230587Sken		printf("mbuf: %p\n", m);
1204230587Sken		return;
1205230587Sken	}
1206230587Sken
1207135904Sjmg	if (m->m_flags & M_PKTHDR)
1208135904Sjmg		len = m->m_pkthdr.len;
1209135904Sjmg	else
1210135904Sjmg		len = -1;
121152756Sphk	m2 = m;
1212135904Sjmg	while (m2 != NULL && (len == -1 || len)) {
1213135904Sjmg		pdata = m2->m_len;
1214135904Sjmg		if (maxlen != -1 && pdata > maxlen)
1215135904Sjmg			pdata = maxlen;
1216135904Sjmg		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1217135904Sjmg		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
1218135904Sjmg		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1219135904Sjmg		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1220135904Sjmg		if (pdata)
1221156700Sjmg			printf(", %*D\n", pdata, (u_char *)m2->m_data, "-");
1222135904Sjmg		if (len != -1)
1223135904Sjmg			len -= m2->m_len;
122452756Sphk		m2 = m2->m_next;
122552756Sphk	}
1226135904Sjmg	if (len > 0)
1227135904Sjmg		printf("%d bytes unaccounted for.\n", len);
122852756Sphk	return;
122952756Sphk}
1230103540Sphk
1231103569Sbmilekicu_int
1232103540Sphkm_fixhdr(struct mbuf *m0)
1233103540Sphk{
1234103569Sbmilekic	u_int len;
1235103540Sphk
1236103544Sphk	len = m_length(m0, NULL);
1237103544Sphk	m0->m_pkthdr.len = len;
1238103544Sphk	return (len);
1239103544Sphk}
1240103544Sphk
1241103569Sbmilekicu_int
1242103544Sphkm_length(struct mbuf *m0, struct mbuf **last)
1243103544Sphk{
1244103544Sphk	struct mbuf *m;
1245103569Sbmilekic	u_int len;
1246103544Sphk
1247103544Sphk	len = 0;
1248103544Sphk	for (m = m0; m != NULL; m = m->m_next) {
1249103540Sphk		len += m->m_len;
1250103544Sphk		if (m->m_next == NULL)
1251103544Sphk			break;
1252103540Sphk	}
1253103544Sphk	if (last != NULL)
1254103544Sphk		*last = m;
1255103544Sphk	return (len);
1256103540Sphk}
1257112777Ssilby
1258112777Ssilby/*
1259112777Ssilby * Defragment a mbuf chain, returning the shortest possible
1260112777Ssilby * chain of mbufs and clusters.  If allocation fails and
1261112777Ssilby * this cannot be completed, NULL will be returned, but
1262112777Ssilby * the passed in chain will be unchanged.  Upon success,
1263112777Ssilby * the original chain will be freed, and the new chain
1264112777Ssilby * will be returned.
1265112777Ssilby *
1266112777Ssilby * If a non-packet header is passed in, the original
1267112777Ssilby * mbuf (chain?) will be returned unharmed.
1268112777Ssilby */
1269112777Ssilbystruct mbuf *
1270112777Ssilbym_defrag(struct mbuf *m0, int how)
1271112777Ssilby{
1272125472Ssilby	struct mbuf *m_new = NULL, *m_final = NULL;
1273125472Ssilby	int progress = 0, length;
1274112777Ssilby
1275132488Salfred	MBUF_CHECKSLEEP(how);
1276112777Ssilby	if (!(m0->m_flags & M_PKTHDR))
1277112777Ssilby		return (m0);
1278112777Ssilby
1279117770Ssilby	m_fixhdr(m0); /* Needed sanity check */
1280117770Ssilby
1281113490Ssilby#ifdef MBUF_STRESS_TEST
1282113490Ssilby	if (m_defragrandomfailures) {
1283113490Ssilby		int temp = arc4random() & 0xff;
1284113490Ssilby		if (temp == 0xba)
1285113490Ssilby			goto nospace;
1286113490Ssilby	}
1287113490Ssilby#endif
1288266876Sglebius
1289112777Ssilby	if (m0->m_pkthdr.len > MHLEN)
1290112777Ssilby		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1291112777Ssilby	else
1292112777Ssilby		m_final = m_gethdr(how, MT_DATA);
1293112777Ssilby
1294112777Ssilby	if (m_final == NULL)
1295112777Ssilby		goto nospace;
1296112777Ssilby
1297123740Speter	if (m_dup_pkthdr(m_final, m0, how) == 0)
1298112777Ssilby		goto nospace;
1299112777Ssilby
1300112777Ssilby	m_new = m_final;
1301112777Ssilby
1302112777Ssilby	while (progress < m0->m_pkthdr.len) {
1303112777Ssilby		length = m0->m_pkthdr.len - progress;
1304112777Ssilby		if (length > MCLBYTES)
1305112777Ssilby			length = MCLBYTES;
1306112777Ssilby
1307112777Ssilby		if (m_new == NULL) {
1308112777Ssilby			if (length > MLEN)
1309112777Ssilby				m_new = m_getcl(how, MT_DATA, 0);
1310112777Ssilby			else
1311112777Ssilby				m_new = m_get(how, MT_DATA);
1312112777Ssilby			if (m_new == NULL)
1313112777Ssilby				goto nospace;
1314112777Ssilby		}
1315112777Ssilby
1316112777Ssilby		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1317112777Ssilby		progress += length;
1318112777Ssilby		m_new->m_len = length;
1319112777Ssilby		if (m_new != m_final)
1320112777Ssilby			m_cat(m_final, m_new);
1321112777Ssilby		m_new = NULL;
1322112777Ssilby	}
1323116455Ssilby#ifdef MBUF_STRESS_TEST
1324112777Ssilby	if (m0->m_next == NULL)
1325112777Ssilby		m_defraguseless++;
1326116455Ssilby#endif
1327112777Ssilby	m_freem(m0);
1328112777Ssilby	m0 = m_final;
1329116455Ssilby#ifdef MBUF_STRESS_TEST
1330112777Ssilby	m_defragpackets++;
1331112777Ssilby	m_defragbytes += m0->m_pkthdr.len;
1332116455Ssilby#endif
1333112777Ssilby	return (m0);
1334112777Ssilbynospace:
1335116455Ssilby#ifdef MBUF_STRESS_TEST
1336112777Ssilby	m_defragfailure++;
1337116455Ssilby#endif
1338112777Ssilby	if (m_final)
1339112777Ssilby		m_freem(m_final);
1340112777Ssilby	return (NULL);
1341112777Ssilby}
1342119644Ssilby
1343175414Ssam/*
1344175414Ssam * Defragment an mbuf chain, returning at most maxfrags separate
1345175414Ssam * mbufs+clusters.  If this is not possible NULL is returned and
1346175414Ssam * the original mbuf chain is left in it's present (potentially
1347175414Ssam * modified) state.  We use two techniques: collapsing consecutive
1348175414Ssam * mbufs and replacing consecutive mbufs by a cluster.
1349175414Ssam *
1350175414Ssam * NB: this should really be named m_defrag but that name is taken
1351175414Ssam */
1352175414Ssamstruct mbuf *
1353175414Ssamm_collapse(struct mbuf *m0, int how, int maxfrags)
1354175414Ssam{
1355175414Ssam	struct mbuf *m, *n, *n2, **prev;
1356175414Ssam	u_int curfrags;
1357175414Ssam
1358175414Ssam	/*
1359175414Ssam	 * Calculate the current number of frags.
1360175414Ssam	 */
1361175414Ssam	curfrags = 0;
1362175414Ssam	for (m = m0; m != NULL; m = m->m_next)
1363175414Ssam		curfrags++;
1364175414Ssam	/*
1365175414Ssam	 * First, try to collapse mbufs.  Note that we always collapse
1366175414Ssam	 * towards the front so we don't need to deal with moving the
1367175414Ssam	 * pkthdr.  This may be suboptimal if the first mbuf has much
1368175414Ssam	 * less data than the following.
1369175414Ssam	 */
1370175414Ssam	m = m0;
1371175414Ssamagain:
1372175414Ssam	for (;;) {
1373175414Ssam		n = m->m_next;
1374175414Ssam		if (n == NULL)
1375175414Ssam			break;
1376242256Sandre		if (M_WRITABLE(m) &&
1377175414Ssam		    n->m_len < M_TRAILINGSPACE(m)) {
1378175414Ssam			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
1379175414Ssam				n->m_len);
1380175414Ssam			m->m_len += n->m_len;
1381175414Ssam			m->m_next = n->m_next;
1382175414Ssam			m_free(n);
1383175414Ssam			if (--curfrags <= maxfrags)
1384175414Ssam				return m0;
1385175414Ssam		} else
1386175414Ssam			m = n;
1387175414Ssam	}
1388175414Ssam	KASSERT(maxfrags > 1,
1389175414Ssam		("maxfrags %u, but normal collapse failed", maxfrags));
1390175414Ssam	/*
1391175414Ssam	 * Collapse consecutive mbufs to a cluster.
1392175414Ssam	 */
1393175414Ssam	prev = &m0->m_next;		/* NB: not the first mbuf */
1394175414Ssam	while ((n = *prev) != NULL) {
1395175414Ssam		if ((n2 = n->m_next) != NULL &&
1396175414Ssam		    n->m_len + n2->m_len < MCLBYTES) {
1397175414Ssam			m = m_getcl(how, MT_DATA, 0);
1398175414Ssam			if (m == NULL)
1399175414Ssam				goto bad;
1400175414Ssam			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
1401175414Ssam			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
1402175414Ssam				n2->m_len);
1403175414Ssam			m->m_len = n->m_len + n2->m_len;
1404175414Ssam			m->m_next = n2->m_next;
1405175414Ssam			*prev = m;
1406175414Ssam			m_free(n);
1407175414Ssam			m_free(n2);
1408175414Ssam			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
1409175414Ssam				return m0;
1410175414Ssam			/*
1411175414Ssam			 * Still not there, try the normal collapse
1412175414Ssam			 * again before we allocate another cluster.
1413175414Ssam			 */
1414175414Ssam			goto again;
1415175414Ssam		}
1416175414Ssam		prev = &n->m_next;
1417175414Ssam	}
1418175414Ssam	/*
1419175414Ssam	 * No place where we can collapse to a cluster; punt.
1420175414Ssam	 * This can occur if, for example, you request 2 frags
1421175414Ssam	 * but the packet requires that both be clusters (we
1422175414Ssam	 * never reallocate the first mbuf to avoid moving the
1423175414Ssam	 * packet header).
1424175414Ssam	 */
1425175414Ssambad:
1426175414Ssam	return NULL;
1427175414Ssam}
1428175414Ssam
1429119644Ssilby#ifdef MBUF_STRESS_TEST
1430119644Ssilby
1431119644Ssilby/*
1432119644Ssilby * Fragment an mbuf chain.  There's no reason you'd ever want to do
1433119644Ssilby * this in normal usage, but it's great for stress testing various
1434119644Ssilby * mbuf consumers.
1435119644Ssilby *
1436119644Ssilby * If fragmentation is not possible, the original chain will be
1437119644Ssilby * returned.
1438119644Ssilby *
1439119644Ssilby * Possible length values:
1440119644Ssilby * 0	 no fragmentation will occur
1441119644Ssilby * > 0	each fragment will be of the specified length
1442119644Ssilby * -1	each fragment will be the same random value in length
1443119644Ssilby * -2	each fragment's length will be entirely random
1444119644Ssilby * (Random values range from 1 to 256)
1445119644Ssilby */
1446119644Ssilbystruct mbuf *
1447119644Ssilbym_fragment(struct mbuf *m0, int how, int length)
1448119644Ssilby{
1449331847Savos	struct mbuf *m_first, *m_last;
1450331847Savos	int divisor = 255, progress = 0, fraglen;
1451119644Ssilby
1452119644Ssilby	if (!(m0->m_flags & M_PKTHDR))
1453119644Ssilby		return (m0);
1454266876Sglebius
1455331847Savos	if (length == 0 || length < -2)
1456119644Ssilby		return (m0);
1457331847Savos	if (length > MCLBYTES)
1458331847Savos		length = MCLBYTES;
1459331847Savos	if (length < 0 && divisor > MCLBYTES)
1460331847Savos		divisor = MCLBYTES;
1461331847Savos	if (length == -1)
1462331847Savos		length = 1 + (arc4random() % divisor);
1463331847Savos	if (length > 0)
1464331847Savos		fraglen = length;
1465119644Ssilby
1466119644Ssilby	m_fixhdr(m0); /* Needed sanity check */
1467119644Ssilby
1468331847Savos	m_first = m_getcl(how, MT_DATA, M_PKTHDR);
1469331847Savos	if (m_first == NULL)
1470119644Ssilby		goto nospace;
1471119644Ssilby
1472331847Savos	if (m_dup_pkthdr(m_first, m0, how) == 0)
1473119644Ssilby		goto nospace;
1474119644Ssilby
1475331847Savos	m_last = m_first;
1476119644Ssilby
1477119644Ssilby	while (progress < m0->m_pkthdr.len) {
1478331847Savos		if (length == -2)
1479331847Savos			fraglen = 1 + (arc4random() % divisor);
1480119644Ssilby		if (fraglen > m0->m_pkthdr.len - progress)
1481119644Ssilby			fraglen = m0->m_pkthdr.len - progress;
1482119644Ssilby
1483331847Savos		if (progress != 0) {
1484331847Savos			struct mbuf *m_new = m_getcl(how, MT_DATA, 0);
1485119644Ssilby			if (m_new == NULL)
1486119644Ssilby				goto nospace;
1487331847Savos
1488331847Savos			m_last->m_next = m_new;
1489331847Savos			m_last = m_new;
1490119644Ssilby		}
1491119644Ssilby
1492331847Savos		m_copydata(m0, progress, fraglen, mtod(m_last, caddr_t));
1493119644Ssilby		progress += fraglen;
1494331847Savos		m_last->m_len = fraglen;
1495119644Ssilby	}
1496119644Ssilby	m_freem(m0);
1497331847Savos	m0 = m_first;
1498119644Ssilby	return (m0);
1499119644Ssilbynospace:
1500331847Savos	if (m_first)
1501331847Savos		m_freem(m_first);
1502119644Ssilby	/* Return the original chain on failure */
1503119644Ssilby	return (m0);
1504119644Ssilby}
1505119644Ssilby
1506119644Ssilby#endif
1507125296Ssilby
1508163915Sandre/*
1509163915Sandre * Copy the contents of uio into a properly sized mbuf chain.
1510163915Sandre */
1511125296Ssilbystruct mbuf *
1512163915Sandrem_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
1513125296Ssilby{
1514163915Sandre	struct mbuf *m, *mb;
1515231949Skib	int error, length;
1516231949Skib	ssize_t total;
1517163915Sandre	int progress = 0;
1518125296Ssilby
1519163915Sandre	/*
1520163915Sandre	 * len can be zero or an arbitrary large value bound by
1521163915Sandre	 * the total data supplied by the uio.
1522163915Sandre	 */
1523125296Ssilby	if (len > 0)
1524125296Ssilby		total = min(uio->uio_resid, len);
1525125296Ssilby	else
1526125296Ssilby		total = uio->uio_resid;
1527163915Sandre
1528163915Sandre	/*
1529163915Sandre	 * The smallest unit returned by m_getm2() is a single mbuf
1530182777Sthompsa	 * with pkthdr.  We can't align past it.
1531163915Sandre	 */
1532145883Semax	if (align >= MHLEN)
1533163915Sandre		return (NULL);
1534163915Sandre
1535166171Sandre	/*
1536166171Sandre	 * Give us the full allocation or nothing.
1537166171Sandre	 * If len is zero return the smallest empty mbuf.
1538166171Sandre	 */
1539166171Sandre	m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags);
1540163915Sandre	if (m == NULL)
1541163915Sandre		return (NULL);
1542163915Sandre	m->m_data += align;
1543163915Sandre
1544163915Sandre	/* Fill all mbufs with uio data and update header information. */
1545163915Sandre	for (mb = m; mb != NULL; mb = mb->m_next) {
1546163915Sandre		length = min(M_TRAILINGSPACE(mb), total - progress);
1547163915Sandre
1548163915Sandre		error = uiomove(mtod(mb, void *), length, uio);
1549163915Sandre		if (error) {
1550163915Sandre			m_freem(m);
1551163915Sandre			return (NULL);
1552125296Ssilby		}
1553163915Sandre
1554163915Sandre		mb->m_len = length;
1555125296Ssilby		progress += length;
1556163915Sandre		if (flags & M_PKTHDR)
1557163915Sandre			m->m_pkthdr.len += length;
1558125296Ssilby	}
1559163915Sandre	KASSERT(progress == total, ("%s: progress != total", __func__));
1560163915Sandre
1561163915Sandre	return (m);
1562125296Ssilby}
1563148552Ssam
1564148552Ssam/*
1565194667Sandre * Copy an mbuf chain into a uio limited by len if set.
1566194667Sandre */
1567194667Sandreint
1568194667Sandrem_mbuftouio(struct uio *uio, struct mbuf *m, int len)
1569194667Sandre{
1570194667Sandre	int error, length, total;
1571194667Sandre	int progress = 0;
1572194667Sandre
1573194667Sandre	if (len > 0)
1574194667Sandre		total = min(uio->uio_resid, len);
1575194667Sandre	else
1576194667Sandre		total = uio->uio_resid;
1577194667Sandre
1578194667Sandre	/* Fill the uio with data from the mbufs. */
1579194667Sandre	for (; m != NULL; m = m->m_next) {
1580194667Sandre		length = min(m->m_len, total - progress);
1581194667Sandre
1582194667Sandre		error = uiomove(mtod(m, void *), length, uio);
1583194667Sandre		if (error)
1584194667Sandre			return (error);
1585194667Sandre
1586194667Sandre		progress += length;
1587194667Sandre	}
1588194667Sandre
1589194667Sandre	return (0);
1590194667Sandre}
1591194667Sandre
1592194667Sandre/*
1593156756Ssam * Create a writable copy of the mbuf chain.  While doing this
1594156756Ssam * we compact the chain with a goal of producing a chain with
1595156756Ssam * at most two mbufs.  The second mbuf in this chain is likely
1596156756Ssam * to be a cluster.  The primary purpose of this work is to create
1597156756Ssam * a writable packet for encryption, compression, etc.  The
1598156756Ssam * secondary goal is to linearize the data so the data can be
1599156756Ssam * passed to crypto hardware in the most efficient manner possible.
1600156756Ssam */
1601156756Ssamstruct mbuf *
1602156756Ssamm_unshare(struct mbuf *m0, int how)
1603156756Ssam{
1604156756Ssam	struct mbuf *m, *mprev;
1605156756Ssam	struct mbuf *n, *mfirst, *mlast;
1606156756Ssam	int len, off;
1607156756Ssam
1608156756Ssam	mprev = NULL;
1609156756Ssam	for (m = m0; m != NULL; m = mprev->m_next) {
1610156756Ssam		/*
1611156756Ssam		 * Regular mbufs are ignored unless there's a cluster
1612156756Ssam		 * in front of it that we can use to coalesce.  We do
1613156756Ssam		 * the latter mainly so later clusters can be coalesced
1614156756Ssam		 * also w/o having to handle them specially (i.e. convert
1615156756Ssam		 * mbuf+cluster -> cluster).  This optimization is heavily
1616156756Ssam		 * influenced by the assumption that we're running over
1617156756Ssam		 * Ethernet where MCLBYTES is large enough that the max
1618156756Ssam		 * packet size will permit lots of coalescing into a
1619156756Ssam		 * single cluster.  This in turn permits efficient
1620156756Ssam		 * crypto operations, especially when using hardware.
1621156756Ssam		 */
1622156756Ssam		if ((m->m_flags & M_EXT) == 0) {
1623156756Ssam			if (mprev && (mprev->m_flags & M_EXT) &&
1624156756Ssam			    m->m_len <= M_TRAILINGSPACE(mprev)) {
1625156756Ssam				/* XXX: this ignores mbuf types */
1626156756Ssam				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1627266876Sglebius				    mtod(m, caddr_t), m->m_len);
1628156756Ssam				mprev->m_len += m->m_len;
1629156756Ssam				mprev->m_next = m->m_next;	/* unlink from chain */
1630156756Ssam				m_free(m);			/* reclaim mbuf */
1631156756Ssam#if 0
1632156756Ssam				newipsecstat.ips_mbcoalesced++;
1633156756Ssam#endif
1634156756Ssam			} else {
1635156756Ssam				mprev = m;
1636156756Ssam			}
1637156756Ssam			continue;
1638156756Ssam		}
1639156756Ssam		/*
1640156756Ssam		 * Writable mbufs are left alone (for now).
1641156756Ssam		 */
1642156756Ssam		if (M_WRITABLE(m)) {
1643156756Ssam			mprev = m;
1644156756Ssam			continue;
1645156756Ssam		}
1646156756Ssam
1647156756Ssam		/*
1648156756Ssam		 * Not writable, replace with a copy or coalesce with
1649156756Ssam		 * the previous mbuf if possible (since we have to copy
1650156756Ssam		 * it anyway, we try to reduce the number of mbufs and
1651156756Ssam		 * clusters so that future work is easier).
1652156756Ssam		 */
1653156756Ssam		KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
1654156756Ssam		/* NB: we only coalesce into a cluster or larger */
1655156756Ssam		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
1656156756Ssam		    m->m_len <= M_TRAILINGSPACE(mprev)) {
1657156756Ssam			/* XXX: this ignores mbuf types */
1658156756Ssam			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1659266876Sglebius			    mtod(m, caddr_t), m->m_len);
1660156756Ssam			mprev->m_len += m->m_len;
1661156756Ssam			mprev->m_next = m->m_next;	/* unlink from chain */
1662156756Ssam			m_free(m);			/* reclaim mbuf */
1663156756Ssam#if 0
1664156756Ssam			newipsecstat.ips_clcoalesced++;
1665156756Ssam#endif
1666156756Ssam			continue;
1667156756Ssam		}
1668156756Ssam
1669156756Ssam		/*
1670248371Sglebius		 * Allocate new space to hold the copy and copy the data.
1671248371Sglebius		 * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by
1672248371Sglebius		 * splitting them into clusters.  We could just malloc a
1673248371Sglebius		 * buffer and make it external but too many device drivers
1674248371Sglebius		 * don't know how to break up the non-contiguous memory when
1675248371Sglebius		 * doing DMA.
1676156756Ssam		 */
1677297298Snp		n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
1678248371Sglebius		if (n == NULL) {
1679248371Sglebius			m_freem(m0);
1680248371Sglebius			return (NULL);
1681156756Ssam		}
1682288990Sglebius		if (m->m_flags & M_PKTHDR) {
1683288990Sglebius			KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR",
1684288990Sglebius			    __func__, m0, m));
1685288990Sglebius			m_move_pkthdr(n, m);
1686288990Sglebius		}
1687156756Ssam		len = m->m_len;
1688156756Ssam		off = 0;
1689156756Ssam		mfirst = n;
1690156756Ssam		mlast = NULL;
1691156756Ssam		for (;;) {
1692156756Ssam			int cc = min(len, MCLBYTES);
1693156756Ssam			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
1694156756Ssam			n->m_len = cc;
1695156756Ssam			if (mlast != NULL)
1696156756Ssam				mlast->m_next = n;
1697266876Sglebius			mlast = n;
1698156756Ssam#if 0
1699156756Ssam			newipsecstat.ips_clcopied++;
1700156756Ssam#endif
1701156756Ssam
1702156756Ssam			len -= cc;
1703156756Ssam			if (len <= 0)
1704156756Ssam				break;
1705156756Ssam			off += cc;
1706156756Ssam
1707297298Snp			n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
1708156756Ssam			if (n == NULL) {
1709156756Ssam				m_freem(mfirst);
1710156756Ssam				m_freem(m0);
1711156756Ssam				return (NULL);
1712156756Ssam			}
1713156756Ssam		}
1714266876Sglebius		n->m_next = m->m_next;
1715156756Ssam		if (mprev == NULL)
1716156756Ssam			m0 = mfirst;		/* new head of chain */
1717156756Ssam		else
1718156756Ssam			mprev->m_next = mfirst;	/* replace old mbuf */
1719156756Ssam		m_free(m);			/* release old mbuf */
1720156756Ssam		mprev = mfirst;
1721156756Ssam	}
1722156756Ssam	return (m0);
1723156756Ssam}
1724178674Sjulian
1725178674Sjulian#ifdef MBUF_PROFILING
1726178674Sjulian
1727178674Sjulian#define MP_BUCKETS 32 /* don't just change this as things may overflow.*/
1728178674Sjulianstruct mbufprofile {
1729178700Sjulian	uintmax_t wasted[MP_BUCKETS];
1730178700Sjulian	uintmax_t used[MP_BUCKETS];
1731178700Sjulian	uintmax_t segments[MP_BUCKETS];
1732178674Sjulian} mbprof;
1733178674Sjulian
1734178674Sjulian#define MP_MAXDIGITS 21	/* strlen("16,000,000,000,000,000,000") == 21 */
1735178674Sjulian#define MP_NUMLINES 6
1736178674Sjulian#define MP_NUMSPERLINE 16
1737178674Sjulian#define MP_EXTRABYTES 64	/* > strlen("used:\nwasted:\nsegments:\n") */
1738178674Sjulian/* work out max space needed and add a bit of spare space too */
1739178674Sjulian#define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE)
1740178674Sjulian#define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES)
1741178674Sjulian
1742178674Sjulianchar mbprofbuf[MP_BUFSIZE];
1743178674Sjulian
1744178674Sjulianvoid
1745178674Sjulianm_profile(struct mbuf *m)
1746178674Sjulian{
1747178674Sjulian	int segments = 0;
1748178674Sjulian	int used = 0;
1749178674Sjulian	int wasted = 0;
1750266876Sglebius
1751178674Sjulian	while (m) {
1752178674Sjulian		segments++;
1753178674Sjulian		used += m->m_len;
1754178674Sjulian		if (m->m_flags & M_EXT) {
1755178674Sjulian			wasted += MHLEN - sizeof(m->m_ext) +
1756178674Sjulian			    m->m_ext.ext_size - m->m_len;
1757178674Sjulian		} else {
1758178674Sjulian			if (m->m_flags & M_PKTHDR)
1759178674Sjulian				wasted += MHLEN - m->m_len;
1760178674Sjulian			else
1761178674Sjulian				wasted += MLEN - m->m_len;
1762178674Sjulian		}
1763178674Sjulian		m = m->m_next;
1764178674Sjulian	}
1765178674Sjulian	/* be paranoid.. it helps */
1766178674Sjulian	if (segments > MP_BUCKETS - 1)
1767178674Sjulian		segments = MP_BUCKETS - 1;
1768178674Sjulian	if (used > 100000)
1769178674Sjulian		used = 100000;
1770178674Sjulian	if (wasted > 100000)
1771178674Sjulian		wasted = 100000;
1772178674Sjulian	/* store in the appropriate bucket */
1773178674Sjulian	/* don't bother locking. if it's slightly off, so what? */
1774178674Sjulian	mbprof.segments[segments]++;
1775178674Sjulian	mbprof.used[fls(used)]++;
1776178674Sjulian	mbprof.wasted[fls(wasted)]++;
1777178674Sjulian}
1778178674Sjulian
1779178674Sjulianstatic void
1780178674Sjulianmbprof_textify(void)
1781178674Sjulian{
1782178674Sjulian	int offset;
1783178674Sjulian	char *c;
1784209390Sed	uint64_t *p;
1785178674Sjulian
1786178674Sjulian	p = &mbprof.wasted[0];
1787178674Sjulian	c = mbprofbuf;
1788266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1789178674Sjulian	    "wasted:\n"
1790178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1791178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1792178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1793178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1794178674Sjulian#ifdef BIG_ARRAY
1795178674Sjulian	p = &mbprof.wasted[16];
1796178674Sjulian	c += offset;
1797266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1798178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1799178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1800178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1801178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1802178674Sjulian#endif
1803178674Sjulian	p = &mbprof.used[0];
1804178674Sjulian	c += offset;
1805266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1806178674Sjulian	    "used:\n"
1807178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1808178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1809178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1810178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1811178674Sjulian#ifdef BIG_ARRAY
1812178674Sjulian	p = &mbprof.used[16];
1813178674Sjulian	c += offset;
1814266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1815178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1816178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1817178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1818178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1819178674Sjulian#endif
1820178674Sjulian	p = &mbprof.segments[0];
1821178674Sjulian	c += offset;
1822266876Sglebius	offset = snprintf(c, MP_MAXLINE + 10,
1823178674Sjulian	    "segments:\n"
1824178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1825178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
1826178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1827178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1828178674Sjulian#ifdef BIG_ARRAY
1829178674Sjulian	p = &mbprof.segments[16];
1830178674Sjulian	c += offset;
1831266876Sglebius	offset = snprintf(c, MP_MAXLINE,
1832178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %ju "
1833178700Sjulian	    "%ju %ju %ju %ju %ju %ju %ju %jju",
1834178674Sjulian	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
1835178674Sjulian	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
1836178674Sjulian#endif
1837178674Sjulian}
1838178674Sjulian
1839178674Sjulianstatic int
1840178674Sjulianmbprof_handler(SYSCTL_HANDLER_ARGS)
1841178674Sjulian{
1842178674Sjulian	int error;
1843178674Sjulian
1844178674Sjulian	mbprof_textify();
1845178674Sjulian	error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1);
1846178674Sjulian	return (error);
1847178674Sjulian}
1848178674Sjulian
1849178674Sjulianstatic int
1850178674Sjulianmbprof_clr_handler(SYSCTL_HANDLER_ARGS)
1851178674Sjulian{
1852178674Sjulian	int clear, error;
1853266876Sglebius
1854178674Sjulian	clear = 0;
1855178674Sjulian	error = sysctl_handle_int(oidp, &clear, 0, req);
1856178674Sjulian	if (error || !req->newptr)
1857178674Sjulian		return (error);
1858266876Sglebius
1859178674Sjulian	if (clear) {
1860178674Sjulian		bzero(&mbprof, sizeof(mbprof));
1861178674Sjulian	}
1862266876Sglebius
1863178674Sjulian	return (error);
1864178674Sjulian}
1865178674Sjulian
1866178674Sjulian
1867178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD,
1868178674Sjulian	    NULL, 0, mbprof_handler, "A", "mbuf profiling statistics");
1869178674Sjulian
1870178674SjulianSYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW,
1871178674Sjulian	    NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics");
1872178674Sjulian#endif
1873178674Sjulian
1874