uipc_mbuf.c revision 149602
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 149602 2005-08-29 20:15:33Z andre $");
34
35#include "opt_mac.h"
36#include "opt_param.h"
37#include "opt_mbuf_stress_test.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/lock.h>
44#include <sys/mac.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/sysctl.h>
48#include <sys/domain.h>
49#include <sys/protosw.h>
50#include <sys/uio.h>
51
52int	max_linkhdr;
53int	max_protohdr;
54int	max_hdr;
55int	max_datalen;
56#ifdef MBUF_STRESS_TEST
57int	m_defragpackets;
58int	m_defragbytes;
59int	m_defraguseless;
60int	m_defragfailure;
61int	m_defragrandomfailures;
62#endif
63
64/*
65 * sysctl(8) exported objects
66 */
67SYSCTL_DECL(_kern_ipc);
68SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
69	   &max_linkhdr, 0, "");
70SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
71	   &max_protohdr, 0, "");
72SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
73SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
74	   &max_datalen, 0, "");
75#ifdef MBUF_STRESS_TEST
76SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
77	   &m_defragpackets, 0, "");
78SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
79	   &m_defragbytes, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
81	   &m_defraguseless, 0, "");
82SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
83	   &m_defragfailure, 0, "");
84SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
85	   &m_defragrandomfailures, 0, "");
86#endif
87
88/*
89 * Malloc-type for external ext_buf ref counts.
90 */
91static MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts");
92
93/*
94 * Allocate a given length worth of mbufs and/or clusters (whatever fits
95 * best) and return a pointer to the top of the allocated chain.  If an
96 * existing mbuf chain is provided, then we will append the new chain
97 * to the existing one but still return the top of the newly allocated
98 * chain.
99 */
100struct mbuf *
101m_getm(struct mbuf *m, int len, int how, short type)
102{
103	struct mbuf *mb, *top, *cur, *mtail;
104	int num, rem;
105	int i;
106
107	KASSERT(len >= 0, ("m_getm(): len is < 0"));
108
109	/* If m != NULL, we will append to the end of that chain. */
110	if (m != NULL)
111		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
112	else
113		mtail = NULL;
114
115	/*
116	 * Calculate how many mbufs+clusters ("packets") we need and how much
117	 * leftover there is after that and allocate the first mbuf+cluster
118	 * if required.
119	 */
120	num = len / MCLBYTES;
121	rem = len % MCLBYTES;
122	top = cur = NULL;
123	if (num > 0) {
124		if ((top = cur = m_getcl(how, type, 0)) == NULL)
125			goto failed;
126		top->m_len = 0;
127	}
128	num--;
129
130	for (i = 0; i < num; i++) {
131		mb = m_getcl(how, type, 0);
132		if (mb == NULL)
133			goto failed;
134		mb->m_len = 0;
135		cur = (cur->m_next = mb);
136	}
137	if (rem > 0) {
138		mb = (rem > MINCLSIZE) ?
139		    m_getcl(how, type, 0) : m_get(how, type);
140		if (mb == NULL)
141			goto failed;
142		mb->m_len = 0;
143		if (cur == NULL)
144			top = mb;
145		else
146			cur->m_next = mb;
147	}
148
149	if (mtail != NULL)
150		mtail->m_next = top;
151	return top;
152failed:
153	if (top != NULL)
154		m_freem(top);
155	return NULL;
156}
157
158/*
159 * Free an entire chain of mbufs and associated external buffers, if
160 * applicable.
161 */
162void
163m_freem(struct mbuf *mb)
164{
165
166	while (mb != NULL)
167		mb = m_free(mb);
168}
169
170/*-
171 * Configure a provided mbuf to refer to the provided external storage
172 * buffer and setup a reference count for said buffer.  If the setting
173 * up of the reference count fails, the M_EXT bit will not be set.  If
174 * successfull, the M_EXT bit is set in the mbuf's flags.
175 *
176 * Arguments:
177 *    mb     The existing mbuf to which to attach the provided buffer.
178 *    buf    The address of the provided external storage buffer.
179 *    size   The size of the provided buffer.
180 *    freef  A pointer to a routine that is responsible for freeing the
181 *           provided external storage buffer.
182 *    args   A pointer to an argument structure (of any type) to be passed
183 *           to the provided freef routine (may be NULL).
184 *    flags  Any other flags to be passed to the provided mbuf.
185 *    type   The type that the external storage buffer should be
186 *           labeled with.
187 *
188 * Returns:
189 *    Nothing.
190 */
191void
192m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
193    void (*freef)(void *, void *), void *args, int flags, int type)
194{
195	u_int *ref_cnt = NULL;
196
197	/* XXX Shouldn't be adding EXT_CLUSTER with this API */
198	if (type == EXT_CLUSTER)
199		ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
200		    mb->m_ext.ext_buf);
201	else if (type == EXT_EXTREF)
202		ref_cnt = __DEVOLATILE(u_int *, mb->m_ext.ref_cnt);
203	mb->m_ext.ref_cnt = (ref_cnt == NULL) ?
204	    malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt;
205	if (mb->m_ext.ref_cnt != NULL) {
206		*(mb->m_ext.ref_cnt) = 1;
207		mb->m_flags |= (M_EXT | flags);
208		mb->m_ext.ext_buf = buf;
209		mb->m_data = mb->m_ext.ext_buf;
210		mb->m_ext.ext_size = size;
211		mb->m_ext.ext_free = freef;
212		mb->m_ext.ext_args = args;
213		mb->m_ext.ext_type = type;
214        }
215}
216
217/*
218 * Non-directly-exported function to clean up after mbufs with M_EXT
219 * storage attached to them if the reference count hits 0.
220 */
221void
222mb_free_ext(struct mbuf *m)
223{
224	u_int cnt;
225	int dofree;
226
227	/* Account for lazy ref count assign. */
228	if (m->m_ext.ref_cnt == NULL)
229		dofree = 1;
230	else
231		dofree = 0;
232
233	/*
234	 * This is tricky.  We need to make sure to decrement the
235	 * refcount in a safe way but to also clean up if we're the
236	 * last reference.  This method seems to do it without race.
237	 */
238	while (dofree == 0) {
239		cnt = *(m->m_ext.ref_cnt);
240		if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) {
241			if (cnt == 1)
242				dofree = 1;
243			break;
244		}
245	}
246
247	if (dofree) {
248		/*
249		 * Do the free, should be safe.
250		 */
251		if (m->m_ext.ext_type == EXT_PACKET) {
252			uma_zfree(zone_pack, m);
253			return;
254		} else if (m->m_ext.ext_type == EXT_CLUSTER) {
255			uma_zfree(zone_clust, m->m_ext.ext_buf);
256			m->m_ext.ext_buf = NULL;
257		} else {
258			(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
259			    m->m_ext.ext_args);
260			if (m->m_ext.ext_type != EXT_EXTREF) {
261				if (m->m_ext.ref_cnt != NULL)
262					free(__DEVOLATILE(u_int *,
263					    m->m_ext.ref_cnt), M_MBUF);
264				m->m_ext.ref_cnt = NULL;
265			}
266			m->m_ext.ext_buf = NULL;
267		}
268	}
269	uma_zfree(zone_mbuf, m);
270}
271
272/*
273 * Clean up mbuf (chain) from any tags and packet headers.
274 * If "all" is set then the first mbuf in the chain will be
275 * cleaned too.
276 */
277void
278m_demote(struct mbuf *m0, int all)
279{
280	struct mbuf *m;
281
282	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
283		if (m->m_flags & M_PKTHDR) {
284			m_tag_delete_chain(m, NULL);
285			m->m_flags &= ~M_PKTHDR;
286			bzero(&m->m_pkthdr, sizeof(struct pkthdr));
287		}
288		if (m->m_type & MT_HEADER)
289			m->m_type = MT_DATA;
290		if (m != m0 && m->m_nextpkt)
291			m->m_nextpkt = NULL;
292		m->m_flags = m->m_flags & (M_EXT|M_EOR|M_RDONLY|M_FREELIST);
293	}
294}
295
296/*
297 * Sanity checks on mbuf (chain).
298 * Returns 0 bad, 1 good, panic worse.
299 * sanitize, 0 run M_SANITY_ACTION, 1 garble things so they blow up later.
300 */
301int
302m_sanity(struct mbuf *m0, int sanitize)
303{
304	struct mbuf *m;
305	caddr_t a, b;
306	int pktlen = 0;
307
308#define	M_SANITY_ACTION(s)	return (0)
309/* #define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m) */
310
311	m = m0;
312	while (m) {
313		/*
314		 * Basic pointer checks.  If any of these fails then some
315		 * unrelated kernel memory before or after us is trashed.
316		 * No way to recover from that.
317		 */
318		a = (m->m_flags & M_EXT ? m->m_ext.ext_buf :
319			(m->m_flags & M_PKTHDR ? (caddr_t)(&m->m_pktdat) :
320			 (caddr_t)(&m->m_dat)) );
321		b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size :
322			(m->m_flags & M_PKTHDR ? MHLEN : MLEN)));
323		if ((caddr_t)m->m_data < a)
324			M_SANITY_ACTION("m_data outside mbuf data range left");
325		if ((caddr_t)m->m_data > b)
326			M_SANITY_ACTION("m_data outside mbuf data range right");
327		if ((caddr_t)m->m_data + m->m_len > b)
328			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
329		if (m->m_flags & M_PKTHDR && m->m_pkthdr.header) {
330			if ((caddr_t)m->m_pkthdr.header < a ||
331			    (caddr_t)m->m_pkthdr.header > b)
332				M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
333		}
334
335		/* m->m_nextpkt may only be set on first mbuf in chain. */
336		if (m != m0 && m->m_nextpkt) {
337			if (sanitize) {
338				m_freem(m->m_nextpkt);
339				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
340			} else
341				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
342		}
343
344		/* correct type correlations. */
345		if (m->m_type == MT_HEADER && !(m->m_flags & M_PKTHDR)) {
346			if (sanitize)
347				m->m_type = MT_DATA;
348			else
349				M_SANITY_ACTION("MT_HEADER set but not M_PKTHDR");
350		}
351
352		/* packet length (not mbuf length!) calculation */
353		if (m0->m_flags & M_PKTHDR)
354			pktlen += m->m_len;
355
356		/* m_tags may only be attached to first mbuf in chain. */
357		if (m != m0 && m->m_flags & M_PKTHDR &&
358		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
359			if (sanitize) {
360				m_tag_delete_chain(m, NULL);
361				/* put in 0xDEADC0DE perhaps? */
362			}
363			else
364				M_SANITY_ACTION("m_tags on in-chain mbuf");
365		}
366
367		/* M_PKTHDR may only be set on first mbuf in chain */
368		if (m != m0 && m->m_flags & M_PKTHDR) {
369			if (sanitize) {
370				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
371				m->m_flags &= ~M_PKTHDR;
372				/* put in 0xDEADCODE and leave hdr flag in */
373			} else
374				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
375		}
376
377		m = m->m_next;
378	}
379	if (pktlen && pktlen != m0->m_pkthdr.len) {
380		if (sanitize)
381			m0->m_pkthdr.len = 0;
382		else
383			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
384	}
385#undef	M_SANITY_ACTION
386
387	return 1;
388}
389
390
391/*
392 * "Move" mbuf pkthdr from "from" to "to".
393 * "from" must have M_PKTHDR set, and "to" must be empty.
394 */
395void
396m_move_pkthdr(struct mbuf *to, struct mbuf *from)
397{
398
399#if 0
400	/* see below for why these are not enabled */
401	M_ASSERTPKTHDR(to);
402	/* Note: with MAC, this may not be a good assertion. */
403	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
404	    ("m_move_pkthdr: to has tags"));
405#endif
406#ifdef MAC
407	/*
408	 * XXXMAC: It could be this should also occur for non-MAC?
409	 */
410	if (to->m_flags & M_PKTHDR)
411		m_tag_delete_chain(to, NULL);
412#endif
413	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
414	if ((to->m_flags & M_EXT) == 0)
415		to->m_data = to->m_pktdat;
416	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
417	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
418	from->m_flags &= ~M_PKTHDR;
419}
420
421/*
422 * Duplicate "from"'s mbuf pkthdr in "to".
423 * "from" must have M_PKTHDR set, and "to" must be empty.
424 * In particular, this does a deep copy of the packet tags.
425 */
426int
427m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
428{
429
430#if 0
431	/*
432	 * The mbuf allocator only initializes the pkthdr
433	 * when the mbuf is allocated with MGETHDR. Many users
434	 * (e.g. m_copy*, m_prepend) use MGET and then
435	 * smash the pkthdr as needed causing these
436	 * assertions to trip.  For now just disable them.
437	 */
438	M_ASSERTPKTHDR(to);
439	/* Note: with MAC, this may not be a good assertion. */
440	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
441#endif
442	MBUF_CHECKSLEEP(how);
443#ifdef MAC
444	if (to->m_flags & M_PKTHDR)
445		m_tag_delete_chain(to, NULL);
446#endif
447	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
448	if ((to->m_flags & M_EXT) == 0)
449		to->m_data = to->m_pktdat;
450	to->m_pkthdr = from->m_pkthdr;
451	SLIST_INIT(&to->m_pkthdr.tags);
452	return (m_tag_copy_chain(to, from, MBTOM(how)));
453}
454
455/*
456 * Lesser-used path for M_PREPEND:
457 * allocate new mbuf to prepend to chain,
458 * copy junk along.
459 */
460struct mbuf *
461m_prepend(struct mbuf *m, int len, int how)
462{
463	struct mbuf *mn;
464
465	if (m->m_flags & M_PKTHDR)
466		MGETHDR(mn, how, m->m_type);
467	else
468		MGET(mn, how, m->m_type);
469	if (mn == NULL) {
470		m_freem(m);
471		return (NULL);
472	}
473	if (m->m_flags & M_PKTHDR)
474		M_MOVE_PKTHDR(mn, m);
475	mn->m_next = m;
476	m = mn;
477	if (len < MHLEN)
478		MH_ALIGN(m, len);
479	m->m_len = len;
480	return (m);
481}
482
483/*
484 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
485 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
486 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
487 * Note that the copy is read-only, because clusters are not copied,
488 * only their reference counts are incremented.
489 */
490struct mbuf *
491m_copym(struct mbuf *m, int off0, int len, int wait)
492{
493	struct mbuf *n, **np;
494	int off = off0;
495	struct mbuf *top;
496	int copyhdr = 0;
497
498	KASSERT(off >= 0, ("m_copym, negative off %d", off));
499	KASSERT(len >= 0, ("m_copym, negative len %d", len));
500	MBUF_CHECKSLEEP(wait);
501	if (off == 0 && m->m_flags & M_PKTHDR)
502		copyhdr = 1;
503	while (off > 0) {
504		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
505		if (off < m->m_len)
506			break;
507		off -= m->m_len;
508		m = m->m_next;
509	}
510	np = &top;
511	top = 0;
512	while (len > 0) {
513		if (m == NULL) {
514			KASSERT(len == M_COPYALL,
515			    ("m_copym, length > size of mbuf chain"));
516			break;
517		}
518		if (copyhdr)
519			MGETHDR(n, wait, m->m_type);
520		else
521			MGET(n, wait, m->m_type);
522		*np = n;
523		if (n == NULL)
524			goto nospace;
525		if (copyhdr) {
526			if (!m_dup_pkthdr(n, m, wait))
527				goto nospace;
528			if (len == M_COPYALL)
529				n->m_pkthdr.len -= off0;
530			else
531				n->m_pkthdr.len = len;
532			copyhdr = 0;
533		}
534		n->m_len = min(len, m->m_len - off);
535		if (m->m_flags & M_EXT) {
536			n->m_data = m->m_data + off;
537			n->m_ext = m->m_ext;
538			n->m_flags |= M_EXT;
539			MEXT_ADD_REF(m);
540			n->m_ext.ref_cnt = m->m_ext.ref_cnt;
541		} else
542			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
543			    (u_int)n->m_len);
544		if (len != M_COPYALL)
545			len -= n->m_len;
546		off = 0;
547		m = m->m_next;
548		np = &n->m_next;
549	}
550	if (top == NULL)
551		mbstat.m_mcfail++;	/* XXX: No consistency. */
552
553	return (top);
554nospace:
555	m_freem(top);
556	mbstat.m_mcfail++;	/* XXX: No consistency. */
557	return (NULL);
558}
559
560/*
561 * Returns mbuf chain with new head for the prepending case.
562 * Copies from mbuf (chain) n from off for len to mbuf (chain) m
563 * either prepending or appending the data.
564 * The resulting mbuf (chain) m is fully writeable.
565 * m is destination (is made writeable)
566 * n is source, off is offset in source, len is len from offset
567 * dir, 0 append, 1 prepend
568 * how, wait or nowait
569 */
570
571static int
572m_bcopyxxx(void *s, void *t, u_int len)
573{
574	bcopy(s, t, (size_t)len);
575	return 0;
576}
577
578struct mbuf *
579m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
580    int prep, int how)
581{
582	struct mbuf *mm, *x, *z;
583	caddr_t p;
584	int i, mlen, nlen = 0;
585	caddr_t buf[MLEN];
586
587	KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source"));
588	KASSERT(off >= 0, ("m_copymdata, negative off %d", off));
589	KASSERT(len >= 0, ("m_copymdata, negative len %d", len));
590	KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep));
591
592	/* Make sure environment is sane. */
593	for (z = m; z != NULL; z = z->m_next) {
594		mlen += z->m_len;
595		if (!M_WRITABLE(z)) {
596			/* Make clusters writeable. */
597			if (z->m_flags & M_RDONLY)
598				return NULL;	/* Can't handle ext ref. */
599			x = m_getcl(how, MT_DATA, 0);
600			if (!x)
601				return NULL;
602			bcopy(z->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
603			p = x->m_ext.ext_buf + (z->m_data - z->m_ext.ext_buf);
604			MEXT_REM_REF(z);	/* XXX */
605			z->m_data = p;
606			x->m_flags &= ~M_EXT;
607			(void)m_free(x);
608		}
609	}
610	mm = prep ? m : z;
611	for (z = n; z != NULL; z = z->m_next)
612		nlen += z->m_len;
613	if (len == M_COPYALL)
614		len = nlen - off;
615	if (off + len > nlen || len < 1)
616		return NULL;
617
618	/*
619	 * Append/prepend the data.  Allocating mbufs as necessary.
620	 */
621	/* Shortcut if enough free space in first/last mbuf. */
622	if (!prep && M_TRAILINGSPACE(mm) >= len) {
623		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t) +
624			 mm->m_len);
625		mm->m_len += len;
626		mm->m_pkthdr.len += len;
627		return m;
628	}
629	if (prep && M_LEADINGSPACE(mm) >= len) {
630		mm->m_data = mtod(mm, caddr_t) - len;
631		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t));
632		mm->m_len += len;
633		mm->m_pkthdr.len += len;
634		return mm;
635	}
636
637	/* Expand first/last mbuf to cluster if possible. */
638	if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) {
639		bcopy(mm->m_data, &buf, mm->m_len);
640		m_clget(mm, how);
641		if (!(mm->m_flags & M_EXT))
642			return NULL;
643		bcopy(&buf, mm->m_ext.ext_buf, mm->m_len);
644		mm->m_data = mm->m_ext.ext_buf;
645		mm->m_pkthdr.header = NULL;
646	}
647	if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) {
648		bcopy(mm->m_data, &buf, mm->m_len);
649		m_clget(mm, how);
650		if (!(mm->m_flags & M_EXT))
651			return NULL;
652		bcopy(&buf, (caddr_t *)mm->m_ext.ext_buf +
653		       mm->m_ext.ext_size - mm->m_len, mm->m_len);
654		mm->m_data = (caddr_t)mm->m_ext.ext_buf +
655			      mm->m_ext.ext_size - mm->m_len;
656		mm->m_pkthdr.header = NULL;
657	}
658
659	/* Append/prepend as many mbuf (clusters) as necessary to fit len. */
660	if (!prep && len > M_TRAILINGSPACE(mm)) {
661		if (!m_getm(mm, len - M_TRAILINGSPACE(mm), how, MT_DATA))
662			return NULL;
663	}
664	if (prep && len > M_LEADINGSPACE(mm)) {
665		if (!(z = m_getm(NULL, len - M_LEADINGSPACE(mm), how, MT_DATA)))
666			return NULL;
667		i = 0;
668		for (x = z; x != NULL; x = x->m_next) {
669			i += x->m_flags & M_EXT ? x->m_ext.ext_size :
670			      (x->m_flags & M_PKTHDR ? MHLEN : MLEN);
671			if (!x->m_next)
672				break;
673		}
674		z->m_data += i - len;
675		m_move_pkthdr(mm, z);
676		x->m_next = mm;
677		mm = z;
678	}
679
680	/* Seek to start position in source mbuf. Optimization for long chains. */
681	while (off > 0) {
682		if (off < n->m_len)
683			break;
684		off -= n->m_len;
685		n = n->m_next;
686	}
687
688	/* Copy data into target mbuf. */
689	z = mm;
690	while (len > 0) {
691		KASSERT(z != NULL, ("m_copymdata, falling off target edge"));
692		i = M_TRAILINGSPACE(z);
693		m_apply(n, off, i, m_bcopyxxx, mtod(z, caddr_t) + z->m_len);
694		z->m_len += i;
695		/* fixup pkthdr.len if necessary */
696		if ((prep ? mm : m)->m_flags & M_PKTHDR)
697			(prep ? mm : m)->m_pkthdr.len += i;
698		off += i;
699		len -= i;
700		z = z->m_next;
701	}
702	return (prep ? mm : m);
703}
704
705/*
706 * Copy an entire packet, including header (which must be present).
707 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
708 * Note that the copy is read-only, because clusters are not copied,
709 * only their reference counts are incremented.
710 * Preserve alignment of the first mbuf so if the creator has left
711 * some room at the beginning (e.g. for inserting protocol headers)
712 * the copies still have the room available.
713 */
714struct mbuf *
715m_copypacket(struct mbuf *m, int how)
716{
717	struct mbuf *top, *n, *o;
718
719	MBUF_CHECKSLEEP(how);
720	MGET(n, how, m->m_type);
721	top = n;
722	if (n == NULL)
723		goto nospace;
724
725	if (!m_dup_pkthdr(n, m, how))
726		goto nospace;
727	n->m_len = m->m_len;
728	if (m->m_flags & M_EXT) {
729		n->m_data = m->m_data;
730		n->m_ext = m->m_ext;
731		n->m_flags |= M_EXT;
732		MEXT_ADD_REF(m);
733		n->m_ext.ref_cnt = m->m_ext.ref_cnt;
734	} else {
735		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
736		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
737	}
738
739	m = m->m_next;
740	while (m) {
741		MGET(o, how, m->m_type);
742		if (o == NULL)
743			goto nospace;
744
745		n->m_next = o;
746		n = n->m_next;
747
748		n->m_len = m->m_len;
749		if (m->m_flags & M_EXT) {
750			n->m_data = m->m_data;
751			n->m_ext = m->m_ext;
752			n->m_flags |= M_EXT;
753			MEXT_ADD_REF(m);
754			n->m_ext.ref_cnt = m->m_ext.ref_cnt;
755		} else {
756			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
757		}
758
759		m = m->m_next;
760	}
761	return top;
762nospace:
763	m_freem(top);
764	mbstat.m_mcfail++;	/* XXX: No consistency. */
765	return (NULL);
766}
767
768/*
769 * Copy data from an mbuf chain starting "off" bytes from the beginning,
770 * continuing for "len" bytes, into the indicated buffer.
771 */
772void
773m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
774{
775	u_int count;
776
777	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
778	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
779	while (off > 0) {
780		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
781		if (off < m->m_len)
782			break;
783		off -= m->m_len;
784		m = m->m_next;
785	}
786	while (len > 0) {
787		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
788		count = min(m->m_len - off, len);
789		bcopy(mtod(m, caddr_t) + off, cp, count);
790		len -= count;
791		cp += count;
792		off = 0;
793		m = m->m_next;
794	}
795}
796
797/*
798 * Copy a packet header mbuf chain into a completely new chain, including
799 * copying any mbuf clusters.  Use this instead of m_copypacket() when
800 * you need a writable copy of an mbuf chain.
801 */
802struct mbuf *
803m_dup(struct mbuf *m, int how)
804{
805	struct mbuf **p, *top = NULL;
806	int remain, moff, nsize;
807
808	MBUF_CHECKSLEEP(how);
809	/* Sanity check */
810	if (m == NULL)
811		return (NULL);
812	M_ASSERTPKTHDR(m);
813
814	/* While there's more data, get a new mbuf, tack it on, and fill it */
815	remain = m->m_pkthdr.len;
816	moff = 0;
817	p = &top;
818	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
819		struct mbuf *n;
820
821		/* Get the next new mbuf */
822		if (remain >= MINCLSIZE) {
823			n = m_getcl(how, m->m_type, 0);
824			nsize = MCLBYTES;
825		} else {
826			n = m_get(how, m->m_type);
827			nsize = MLEN;
828		}
829		if (n == NULL)
830			goto nospace;
831
832		if (top == NULL) {		/* First one, must be PKTHDR */
833			if (!m_dup_pkthdr(n, m, how)) {
834				m_free(n);
835				goto nospace;
836			}
837			nsize = MHLEN;
838		}
839		n->m_len = 0;
840
841		/* Link it into the new chain */
842		*p = n;
843		p = &n->m_next;
844
845		/* Copy data from original mbuf(s) into new mbuf */
846		while (n->m_len < nsize && m != NULL) {
847			int chunk = min(nsize - n->m_len, m->m_len - moff);
848
849			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
850			moff += chunk;
851			n->m_len += chunk;
852			remain -= chunk;
853			if (moff == m->m_len) {
854				m = m->m_next;
855				moff = 0;
856			}
857		}
858
859		/* Check correct total mbuf length */
860		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
861		    	("%s: bogus m_pkthdr.len", __func__));
862	}
863	return (top);
864
865nospace:
866	m_freem(top);
867	mbstat.m_mcfail++;	/* XXX: No consistency. */
868	return (NULL);
869}
870
871/*
872 * Concatenate mbuf chain n to m.
873 * Both chains must be of the same type (e.g. MT_DATA).
874 * Any m_pkthdr is not updated.
875 */
876void
877m_cat(struct mbuf *m, struct mbuf *n)
878{
879	while (m->m_next)
880		m = m->m_next;
881	while (n) {
882		if (m->m_flags & M_EXT ||
883		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
884			/* just join the two chains */
885			m->m_next = n;
886			return;
887		}
888		/* splat the data from one into the other */
889		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
890		    (u_int)n->m_len);
891		m->m_len += n->m_len;
892		n = m_free(n);
893	}
894}
895
896void
897m_adj(struct mbuf *mp, int req_len)
898{
899	int len = req_len;
900	struct mbuf *m;
901	int count;
902
903	if ((m = mp) == NULL)
904		return;
905	if (len >= 0) {
906		/*
907		 * Trim from head.
908		 */
909		while (m != NULL && len > 0) {
910			if (m->m_len <= len) {
911				len -= m->m_len;
912				m->m_len = 0;
913				m = m->m_next;
914			} else {
915				m->m_len -= len;
916				m->m_data += len;
917				len = 0;
918			}
919		}
920		m = mp;
921		if (mp->m_flags & M_PKTHDR)
922			m->m_pkthdr.len -= (req_len - len);
923	} else {
924		/*
925		 * Trim from tail.  Scan the mbuf chain,
926		 * calculating its length and finding the last mbuf.
927		 * If the adjustment only affects this mbuf, then just
928		 * adjust and return.  Otherwise, rescan and truncate
929		 * after the remaining size.
930		 */
931		len = -len;
932		count = 0;
933		for (;;) {
934			count += m->m_len;
935			if (m->m_next == (struct mbuf *)0)
936				break;
937			m = m->m_next;
938		}
939		if (m->m_len >= len) {
940			m->m_len -= len;
941			if (mp->m_flags & M_PKTHDR)
942				mp->m_pkthdr.len -= len;
943			return;
944		}
945		count -= len;
946		if (count < 0)
947			count = 0;
948		/*
949		 * Correct length for chain is "count".
950		 * Find the mbuf with last data, adjust its length,
951		 * and toss data from remaining mbufs on chain.
952		 */
953		m = mp;
954		if (m->m_flags & M_PKTHDR)
955			m->m_pkthdr.len = count;
956		for (; m; m = m->m_next) {
957			if (m->m_len >= count) {
958				m->m_len = count;
959				if (m->m_next != NULL) {
960					m_freem(m->m_next);
961					m->m_next = NULL;
962				}
963				break;
964			}
965			count -= m->m_len;
966		}
967	}
968}
969
970/*
971 * Rearange an mbuf chain so that len bytes are contiguous
972 * and in the data area of an mbuf (so that mtod and dtom
973 * will work for a structure of size len).  Returns the resulting
974 * mbuf chain on success, frees it and returns null on failure.
975 * If there is room, it will add up to max_protohdr-len extra bytes to the
976 * contiguous region in an attempt to avoid being called next time.
977 */
978struct mbuf *
979m_pullup(struct mbuf *n, int len)
980{
981	struct mbuf *m;
982	int count;
983	int space;
984
985	/*
986	 * If first mbuf has no cluster, and has room for len bytes
987	 * without shifting current data, pullup into it,
988	 * otherwise allocate a new mbuf to prepend to the chain.
989	 */
990	if ((n->m_flags & M_EXT) == 0 &&
991	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
992		if (n->m_len >= len)
993			return (n);
994		m = n;
995		n = n->m_next;
996		len -= m->m_len;
997	} else {
998		if (len > MHLEN)
999			goto bad;
1000		MGET(m, M_DONTWAIT, n->m_type);
1001		if (m == NULL)
1002			goto bad;
1003		m->m_len = 0;
1004		if (n->m_flags & M_PKTHDR)
1005			M_MOVE_PKTHDR(m, n);
1006	}
1007	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1008	do {
1009		count = min(min(max(len, max_protohdr), space), n->m_len);
1010		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1011		  (u_int)count);
1012		len -= count;
1013		m->m_len += count;
1014		n->m_len -= count;
1015		space -= count;
1016		if (n->m_len)
1017			n->m_data += count;
1018		else
1019			n = m_free(n);
1020	} while (len > 0 && n);
1021	if (len > 0) {
1022		(void) m_free(m);
1023		goto bad;
1024	}
1025	m->m_next = n;
1026	return (m);
1027bad:
1028	m_freem(n);
1029	mbstat.m_mpfail++;	/* XXX: No consistency. */
1030	return (NULL);
1031}
1032
1033/*
1034 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1035 * the amount of empty space before the data in the new mbuf to be specified
1036 * (in the event that the caller expects to prepend later).
1037 */
1038int MSFail;
1039
1040struct mbuf *
1041m_copyup(struct mbuf *n, int len, int dstoff)
1042{
1043	struct mbuf *m;
1044	int count, space;
1045
1046	if (len > (MHLEN - dstoff))
1047		goto bad;
1048	MGET(m, M_DONTWAIT, n->m_type);
1049	if (m == NULL)
1050		goto bad;
1051	m->m_len = 0;
1052	if (n->m_flags & M_PKTHDR)
1053		M_MOVE_PKTHDR(m, n);
1054	m->m_data += dstoff;
1055	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1056	do {
1057		count = min(min(max(len, max_protohdr), space), n->m_len);
1058		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
1059		    (unsigned)count);
1060		len -= count;
1061		m->m_len += count;
1062		n->m_len -= count;
1063		space -= count;
1064		if (n->m_len)
1065			n->m_data += count;
1066		else
1067			n = m_free(n);
1068	} while (len > 0 && n);
1069	if (len > 0) {
1070		(void) m_free(m);
1071		goto bad;
1072	}
1073	m->m_next = n;
1074	return (m);
1075 bad:
1076	m_freem(n);
1077	MSFail++;
1078	return (NULL);
1079}
1080
1081/*
1082 * Partition an mbuf chain in two pieces, returning the tail --
1083 * all but the first len0 bytes.  In case of failure, it returns NULL and
1084 * attempts to restore the chain to its original state.
1085 *
1086 * Note that the resulting mbufs might be read-only, because the new
1087 * mbuf can end up sharing an mbuf cluster with the original mbuf if
1088 * the "breaking point" happens to lie within a cluster mbuf. Use the
1089 * M_WRITABLE() macro to check for this case.
1090 */
1091struct mbuf *
1092m_split(struct mbuf *m0, int len0, int wait)
1093{
1094	struct mbuf *m, *n;
1095	u_int len = len0, remain;
1096
1097	MBUF_CHECKSLEEP(wait);
1098	for (m = m0; m && len > m->m_len; m = m->m_next)
1099		len -= m->m_len;
1100	if (m == NULL)
1101		return (NULL);
1102	remain = m->m_len - len;
1103	if (m0->m_flags & M_PKTHDR) {
1104		MGETHDR(n, wait, m0->m_type);
1105		if (n == NULL)
1106			return (NULL);
1107		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1108		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1109		m0->m_pkthdr.len = len0;
1110		if (m->m_flags & M_EXT)
1111			goto extpacket;
1112		if (remain > MHLEN) {
1113			/* m can't be the lead packet */
1114			MH_ALIGN(n, 0);
1115			n->m_next = m_split(m, len, wait);
1116			if (n->m_next == NULL) {
1117				(void) m_free(n);
1118				return (NULL);
1119			} else {
1120				n->m_len = 0;
1121				return (n);
1122			}
1123		} else
1124			MH_ALIGN(n, remain);
1125	} else if (remain == 0) {
1126		n = m->m_next;
1127		m->m_next = NULL;
1128		return (n);
1129	} else {
1130		MGET(n, wait, m->m_type);
1131		if (n == NULL)
1132			return (NULL);
1133		M_ALIGN(n, remain);
1134	}
1135extpacket:
1136	if (m->m_flags & M_EXT) {
1137		n->m_flags |= M_EXT;
1138		n->m_ext = m->m_ext;
1139		MEXT_ADD_REF(m);
1140		n->m_ext.ref_cnt = m->m_ext.ref_cnt;
1141		n->m_data = m->m_data + len;
1142	} else {
1143		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1144	}
1145	n->m_len = remain;
1146	m->m_len = len;
1147	n->m_next = m->m_next;
1148	m->m_next = NULL;
1149	return (n);
1150}
1151/*
1152 * Routine to copy from device local memory into mbufs.
1153 * Note that `off' argument is offset into first mbuf of target chain from
1154 * which to begin copying the data to.
1155 */
1156struct mbuf *
1157m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
1158	 void (*copy)(char *from, caddr_t to, u_int len))
1159{
1160	struct mbuf *m;
1161	struct mbuf *top = NULL, **mp = &top;
1162	int len;
1163
1164	if (off < 0 || off > MHLEN)
1165		return (NULL);
1166
1167	while (totlen > 0) {
1168		if (top == NULL) {	/* First one, must be PKTHDR */
1169			if (totlen + off >= MINCLSIZE) {
1170				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1171				len = MCLBYTES;
1172			} else {
1173				m = m_gethdr(M_DONTWAIT, MT_DATA);
1174				len = MHLEN;
1175
1176				/* Place initial small packet/header at end of mbuf */
1177				if (m && totlen + off + max_linkhdr <= MLEN) {
1178					m->m_data += max_linkhdr;
1179					len -= max_linkhdr;
1180				}
1181			}
1182			if (m == NULL)
1183				return NULL;
1184			m->m_pkthdr.rcvif = ifp;
1185			m->m_pkthdr.len = totlen;
1186		} else {
1187			if (totlen + off >= MINCLSIZE) {
1188				m = m_getcl(M_DONTWAIT, MT_DATA, 0);
1189				len = MCLBYTES;
1190			} else {
1191				m = m_get(M_DONTWAIT, MT_DATA);
1192				len = MLEN;
1193			}
1194			if (m == NULL) {
1195				m_freem(top);
1196				return NULL;
1197			}
1198		}
1199		if (off) {
1200			m->m_data += off;
1201			len -= off;
1202			off = 0;
1203		}
1204		m->m_len = len = min(totlen, len);
1205		if (copy)
1206			copy(buf, mtod(m, caddr_t), (u_int)len);
1207		else
1208			bcopy(buf, mtod(m, caddr_t), (u_int)len);
1209		buf += len;
1210		*mp = m;
1211		mp = &m->m_next;
1212		totlen -= len;
1213	}
1214	return (top);
1215}
1216
1217/*
1218 * Copy data from a buffer back into the indicated mbuf chain,
1219 * starting "off" bytes from the beginning, extending the mbuf
1220 * chain if necessary.
1221 */
1222void
1223m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
1224{
1225	int mlen;
1226	struct mbuf *m = m0, *n;
1227	int totlen = 0;
1228
1229	if (m0 == NULL)
1230		return;
1231	while (off > (mlen = m->m_len)) {
1232		off -= mlen;
1233		totlen += mlen;
1234		if (m->m_next == NULL) {
1235			n = m_get(M_DONTWAIT, m->m_type);
1236			if (n == NULL)
1237				goto out;
1238			bzero(mtod(n, caddr_t), MLEN);
1239			n->m_len = min(MLEN, len + off);
1240			m->m_next = n;
1241		}
1242		m = m->m_next;
1243	}
1244	while (len > 0) {
1245		mlen = min (m->m_len - off, len);
1246		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
1247		cp += mlen;
1248		len -= mlen;
1249		mlen += off;
1250		off = 0;
1251		totlen += mlen;
1252		if (len == 0)
1253			break;
1254		if (m->m_next == NULL) {
1255			n = m_get(M_DONTWAIT, m->m_type);
1256			if (n == NULL)
1257				break;
1258			n->m_len = min(MLEN, len);
1259			m->m_next = n;
1260		}
1261		m = m->m_next;
1262	}
1263out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1264		m->m_pkthdr.len = totlen;
1265}
1266
1267/*
1268 * Append the specified data to the indicated mbuf chain,
1269 * Extend the mbuf chain if the new data does not fit in
1270 * existing space.
1271 *
1272 * Return 1 if able to complete the job; otherwise 0.
1273 */
1274int
1275m_append(struct mbuf *m0, int len, c_caddr_t cp)
1276{
1277	struct mbuf *m, *n;
1278	int remainder, space;
1279
1280	for (m = m0; m->m_next != NULL; m = m->m_next)
1281		;
1282	remainder = len;
1283	space = M_TRAILINGSPACE(m);
1284	if (space > 0) {
1285		/*
1286		 * Copy into available space.
1287		 */
1288		if (space > remainder)
1289			space = remainder;
1290		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1291		m->m_len += space;
1292		cp += space, remainder -= space;
1293	}
1294	while (remainder > 0) {
1295		/*
1296		 * Allocate a new mbuf; could check space
1297		 * and allocate a cluster instead.
1298		 */
1299		n = m_get(M_DONTWAIT, m->m_type);
1300		if (n == NULL)
1301			break;
1302		n->m_len = min(MLEN, remainder);
1303		bcopy(cp, mtod(n, caddr_t), n->m_len);
1304		cp += n->m_len, remainder -= n->m_len;
1305		m->m_next = n;
1306		m = n;
1307	}
1308	if (m0->m_flags & M_PKTHDR)
1309		m0->m_pkthdr.len += len - remainder;
1310	return (remainder == 0);
1311}
1312
1313/*
1314 * Apply function f to the data in an mbuf chain starting "off" bytes from
1315 * the beginning, continuing for "len" bytes.
1316 */
1317int
1318m_apply(struct mbuf *m, int off, int len,
1319    int (*f)(void *, void *, u_int), void *arg)
1320{
1321	u_int count;
1322	int rval;
1323
1324	KASSERT(off >= 0, ("m_apply, negative off %d", off));
1325	KASSERT(len >= 0, ("m_apply, negative len %d", len));
1326	while (off > 0) {
1327		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1328		if (off < m->m_len)
1329			break;
1330		off -= m->m_len;
1331		m = m->m_next;
1332	}
1333	while (len > 0) {
1334		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1335		count = min(m->m_len - off, len);
1336		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1337		if (rval)
1338			return (rval);
1339		len -= count;
1340		off = 0;
1341		m = m->m_next;
1342	}
1343	return (0);
1344}
1345
1346/*
1347 * Return a pointer to mbuf/offset of location in mbuf chain.
1348 */
1349struct mbuf *
1350m_getptr(struct mbuf *m, int loc, int *off)
1351{
1352
1353	while (loc >= 0) {
1354		/* Normal end of search. */
1355		if (m->m_len > loc) {
1356			*off = loc;
1357			return (m);
1358		} else {
1359			loc -= m->m_len;
1360			if (m->m_next == NULL) {
1361				if (loc == 0) {
1362					/* Point at the end of valid data. */
1363					*off = m->m_len;
1364					return (m);
1365				}
1366				return (NULL);
1367			}
1368			m = m->m_next;
1369		}
1370	}
1371	return (NULL);
1372}
1373
1374void
1375m_print(const struct mbuf *m, int maxlen)
1376{
1377	int len;
1378	int pdata;
1379	const struct mbuf *m2;
1380
1381	if (m->m_flags & M_PKTHDR)
1382		len = m->m_pkthdr.len;
1383	else
1384		len = -1;
1385	m2 = m;
1386	while (m2 != NULL && (len == -1 || len)) {
1387		pdata = m2->m_len;
1388		if (maxlen != -1 && pdata > maxlen)
1389			pdata = maxlen;
1390		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1391		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
1392		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1393		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1394		if (pdata)
1395			printf(", %*D\n", m2->m_len, (u_char *)m2->m_data, "-");
1396		if (len != -1)
1397			len -= m2->m_len;
1398		m2 = m2->m_next;
1399	}
1400	if (len > 0)
1401		printf("%d bytes unaccounted for.\n", len);
1402	return;
1403}
1404
1405u_int
1406m_fixhdr(struct mbuf *m0)
1407{
1408	u_int len;
1409
1410	len = m_length(m0, NULL);
1411	m0->m_pkthdr.len = len;
1412	return (len);
1413}
1414
1415u_int
1416m_length(struct mbuf *m0, struct mbuf **last)
1417{
1418	struct mbuf *m;
1419	u_int len;
1420
1421	len = 0;
1422	for (m = m0; m != NULL; m = m->m_next) {
1423		len += m->m_len;
1424		if (m->m_next == NULL)
1425			break;
1426	}
1427	if (last != NULL)
1428		*last = m;
1429	return (len);
1430}
1431
1432/*
1433 * Defragment a mbuf chain, returning the shortest possible
1434 * chain of mbufs and clusters.  If allocation fails and
1435 * this cannot be completed, NULL will be returned, but
1436 * the passed in chain will be unchanged.  Upon success,
1437 * the original chain will be freed, and the new chain
1438 * will be returned.
1439 *
1440 * If a non-packet header is passed in, the original
1441 * mbuf (chain?) will be returned unharmed.
1442 */
1443struct mbuf *
1444m_defrag(struct mbuf *m0, int how)
1445{
1446	struct mbuf *m_new = NULL, *m_final = NULL;
1447	int progress = 0, length;
1448
1449	MBUF_CHECKSLEEP(how);
1450	if (!(m0->m_flags & M_PKTHDR))
1451		return (m0);
1452
1453	m_fixhdr(m0); /* Needed sanity check */
1454
1455#ifdef MBUF_STRESS_TEST
1456	if (m_defragrandomfailures) {
1457		int temp = arc4random() & 0xff;
1458		if (temp == 0xba)
1459			goto nospace;
1460	}
1461#endif
1462
1463	if (m0->m_pkthdr.len > MHLEN)
1464		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1465	else
1466		m_final = m_gethdr(how, MT_DATA);
1467
1468	if (m_final == NULL)
1469		goto nospace;
1470
1471	if (m_dup_pkthdr(m_final, m0, how) == 0)
1472		goto nospace;
1473
1474	m_new = m_final;
1475
1476	while (progress < m0->m_pkthdr.len) {
1477		length = m0->m_pkthdr.len - progress;
1478		if (length > MCLBYTES)
1479			length = MCLBYTES;
1480
1481		if (m_new == NULL) {
1482			if (length > MLEN)
1483				m_new = m_getcl(how, MT_DATA, 0);
1484			else
1485				m_new = m_get(how, MT_DATA);
1486			if (m_new == NULL)
1487				goto nospace;
1488		}
1489
1490		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1491		progress += length;
1492		m_new->m_len = length;
1493		if (m_new != m_final)
1494			m_cat(m_final, m_new);
1495		m_new = NULL;
1496	}
1497#ifdef MBUF_STRESS_TEST
1498	if (m0->m_next == NULL)
1499		m_defraguseless++;
1500#endif
1501	m_freem(m0);
1502	m0 = m_final;
1503#ifdef MBUF_STRESS_TEST
1504	m_defragpackets++;
1505	m_defragbytes += m0->m_pkthdr.len;
1506#endif
1507	return (m0);
1508nospace:
1509#ifdef MBUF_STRESS_TEST
1510	m_defragfailure++;
1511#endif
1512	if (m_final)
1513		m_freem(m_final);
1514	return (NULL);
1515}
1516
1517#ifdef MBUF_STRESS_TEST
1518
1519/*
1520 * Fragment an mbuf chain.  There's no reason you'd ever want to do
1521 * this in normal usage, but it's great for stress testing various
1522 * mbuf consumers.
1523 *
1524 * If fragmentation is not possible, the original chain will be
1525 * returned.
1526 *
1527 * Possible length values:
1528 * 0	 no fragmentation will occur
1529 * > 0	each fragment will be of the specified length
1530 * -1	each fragment will be the same random value in length
1531 * -2	each fragment's length will be entirely random
1532 * (Random values range from 1 to 256)
1533 */
1534struct mbuf *
1535m_fragment(struct mbuf *m0, int how, int length)
1536{
1537	struct mbuf *m_new = NULL, *m_final = NULL;
1538	int progress = 0;
1539
1540	if (!(m0->m_flags & M_PKTHDR))
1541		return (m0);
1542
1543	if ((length == 0) || (length < -2))
1544		return (m0);
1545
1546	m_fixhdr(m0); /* Needed sanity check */
1547
1548	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1549
1550	if (m_final == NULL)
1551		goto nospace;
1552
1553	if (m_dup_pkthdr(m_final, m0, how) == 0)
1554		goto nospace;
1555
1556	m_new = m_final;
1557
1558	if (length == -1)
1559		length = 1 + (arc4random() & 255);
1560
1561	while (progress < m0->m_pkthdr.len) {
1562		int fraglen;
1563
1564		if (length > 0)
1565			fraglen = length;
1566		else
1567			fraglen = 1 + (arc4random() & 255);
1568		if (fraglen > m0->m_pkthdr.len - progress)
1569			fraglen = m0->m_pkthdr.len - progress;
1570
1571		if (fraglen > MCLBYTES)
1572			fraglen = MCLBYTES;
1573
1574		if (m_new == NULL) {
1575			m_new = m_getcl(how, MT_DATA, 0);
1576			if (m_new == NULL)
1577				goto nospace;
1578		}
1579
1580		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1581		progress += fraglen;
1582		m_new->m_len = fraglen;
1583		if (m_new != m_final)
1584			m_cat(m_final, m_new);
1585		m_new = NULL;
1586	}
1587	m_freem(m0);
1588	m0 = m_final;
1589	return (m0);
1590nospace:
1591	if (m_final)
1592		m_freem(m_final);
1593	/* Return the original chain on failure */
1594	return (m0);
1595}
1596
1597#endif
1598
1599struct mbuf *
1600m_uiotombuf(struct uio *uio, int how, int len, int align)
1601{
1602	struct mbuf *m_new = NULL, *m_final = NULL;
1603	int progress = 0, error = 0, length, total;
1604
1605	if (len > 0)
1606		total = min(uio->uio_resid, len);
1607	else
1608		total = uio->uio_resid;
1609	if (align >= MHLEN)
1610		goto nospace;
1611	if (total + align > MHLEN)
1612		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1613	else
1614		m_final = m_gethdr(how, MT_DATA);
1615	if (m_final == NULL)
1616		goto nospace;
1617	m_final->m_data += align;
1618	m_new = m_final;
1619	while (progress < total) {
1620		length = total - progress;
1621		if (length > MCLBYTES)
1622			length = MCLBYTES;
1623		if (m_new == NULL) {
1624			if (length > MLEN)
1625				m_new = m_getcl(how, MT_DATA, 0);
1626			else
1627				m_new = m_get(how, MT_DATA);
1628			if (m_new == NULL)
1629				goto nospace;
1630		}
1631		error = uiomove(mtod(m_new, void *), length, uio);
1632		if (error)
1633			goto nospace;
1634		progress += length;
1635		m_new->m_len = length;
1636		if (m_new != m_final)
1637			m_cat(m_final, m_new);
1638		m_new = NULL;
1639	}
1640	m_fixhdr(m_final);
1641	return (m_final);
1642nospace:
1643	if (m_new)
1644		m_free(m_new);
1645	if (m_final)
1646		m_freem(m_final);
1647	return (NULL);
1648}
1649
1650/*
1651 * Set the m_data pointer of a newly-allocated mbuf
1652 * to place an object of the specified size at the
1653 * end of the mbuf, longword aligned.
1654 */
1655void
1656m_align(struct mbuf *m, int len)
1657{
1658	int adjust;
1659
1660	if (m->m_flags & M_EXT)
1661		adjust = m->m_ext.ext_size - len;
1662	else if (m->m_flags & M_PKTHDR)
1663		adjust = MHLEN - len;
1664	else
1665		adjust = MLEN - len;
1666	m->m_data += adjust &~ (sizeof(long)-1);
1667}
1668