uipc_mbuf.c revision 152101
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 152101 2005-11-05 19:43:55Z andre $");
34
35#include "opt_mac.h"
36#include "opt_param.h"
37#include "opt_mbuf_stress_test.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/lock.h>
44#include <sys/mac.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/sysctl.h>
48#include <sys/domain.h>
49#include <sys/protosw.h>
50#include <sys/uio.h>
51
52int	max_linkhdr;
53int	max_protohdr;
54int	max_hdr;
55int	max_datalen;
56#ifdef MBUF_STRESS_TEST
57int	m_defragpackets;
58int	m_defragbytes;
59int	m_defraguseless;
60int	m_defragfailure;
61int	m_defragrandomfailures;
62#endif
63
64/*
65 * sysctl(8) exported objects
66 */
67SYSCTL_DECL(_kern_ipc);
68SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
69	   &max_linkhdr, 0, "");
70SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
71	   &max_protohdr, 0, "");
72SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
73SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
74	   &max_datalen, 0, "");
75#ifdef MBUF_STRESS_TEST
76SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
77	   &m_defragpackets, 0, "");
78SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
79	   &m_defragbytes, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
81	   &m_defraguseless, 0, "");
82SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
83	   &m_defragfailure, 0, "");
84SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
85	   &m_defragrandomfailures, 0, "");
86#endif
87
88/*
89 * Allocate a given length worth of mbufs and/or clusters (whatever fits
90 * best) and return a pointer to the top of the allocated chain.  If an
91 * existing mbuf chain is provided, then we will append the new chain
92 * to the existing one but still return the top of the newly allocated
93 * chain.
94 */
95struct mbuf *
96m_getm(struct mbuf *m, int len, int how, short type)
97{
98	struct mbuf *mb, *top, *cur, *mtail;
99	int num, rem;
100	int i;
101
102	KASSERT(len >= 0, ("m_getm(): len is < 0"));
103
104	/* If m != NULL, we will append to the end of that chain. */
105	if (m != NULL)
106		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
107	else
108		mtail = NULL;
109
110	/*
111	 * Calculate how many mbufs+clusters ("packets") we need and how much
112	 * leftover there is after that and allocate the first mbuf+cluster
113	 * if required.
114	 */
115	num = len / MCLBYTES;
116	rem = len % MCLBYTES;
117	top = cur = NULL;
118	if (num > 0) {
119		if ((top = cur = m_getcl(how, type, 0)) == NULL)
120			goto failed;
121		top->m_len = 0;
122	}
123	num--;
124
125	for (i = 0; i < num; i++) {
126		mb = m_getcl(how, type, 0);
127		if (mb == NULL)
128			goto failed;
129		mb->m_len = 0;
130		cur = (cur->m_next = mb);
131	}
132	if (rem > 0) {
133		mb = (rem > MINCLSIZE) ?
134		    m_getcl(how, type, 0) : m_get(how, type);
135		if (mb == NULL)
136			goto failed;
137		mb->m_len = 0;
138		if (cur == NULL)
139			top = mb;
140		else
141			cur->m_next = mb;
142	}
143
144	if (mtail != NULL)
145		mtail->m_next = top;
146	return top;
147failed:
148	if (top != NULL)
149		m_freem(top);
150	return NULL;
151}
152
153/*
154 * Free an entire chain of mbufs and associated external buffers, if
155 * applicable.
156 */
157void
158m_freem(struct mbuf *mb)
159{
160
161	while (mb != NULL)
162		mb = m_free(mb);
163}
164
165/*-
166 * Configure a provided mbuf to refer to the provided external storage
167 * buffer and setup a reference count for said buffer.  If the setting
168 * up of the reference count fails, the M_EXT bit will not be set.  If
169 * successfull, the M_EXT bit is set in the mbuf's flags.
170 *
171 * Arguments:
172 *    mb     The existing mbuf to which to attach the provided buffer.
173 *    buf    The address of the provided external storage buffer.
174 *    size   The size of the provided buffer.
175 *    freef  A pointer to a routine that is responsible for freeing the
176 *           provided external storage buffer.
177 *    args   A pointer to an argument structure (of any type) to be passed
178 *           to the provided freef routine (may be NULL).
179 *    flags  Any other flags to be passed to the provided mbuf.
180 *    type   The type that the external storage buffer should be
181 *           labeled with.
182 *
183 * Returns:
184 *    Nothing.
185 */
186void
187m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
188    void (*freef)(void *, void *), void *args, int flags, int type)
189{
190	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
191
192	if (type != EXT_EXTREF)
193		mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
194	if (mb->m_ext.ref_cnt != NULL) {
195		*(mb->m_ext.ref_cnt) = 1;
196		mb->m_flags |= (M_EXT | flags);
197		mb->m_ext.ext_buf = buf;
198		mb->m_data = mb->m_ext.ext_buf;
199		mb->m_ext.ext_size = size;
200		mb->m_ext.ext_free = freef;
201		mb->m_ext.ext_args = args;
202		mb->m_ext.ext_type = type;
203        }
204}
205
206/*
207 * Non-directly-exported function to clean up after mbufs with M_EXT
208 * storage attached to them if the reference count hits 1.
209 */
210void
211mb_free_ext(struct mbuf *m)
212{
213	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
214	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
215
216	/* Free attached storage if this mbuf is the only reference to it. */
217	if (*(m->m_ext.ref_cnt) == 1 ||
218	    atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 0) {
219		switch (m->m_ext.ext_type) {
220		case EXT_PACKET:	/* The packet zone is special. */
221			if (*(m->m_ext.ref_cnt) == 0)
222				*(m->m_ext.ref_cnt) = 1;
223			uma_zfree(zone_pack, m);
224			return;		/* Job done. */
225		case EXT_CLUSTER:
226			uma_zfree(zone_clust, m->m_ext.ext_buf);
227			break;
228		case EXT_JUMBO9:
229			uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
230			break;
231		case EXT_JUMBO16:
232			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
233			break;
234		case EXT_SFBUF:
235		case EXT_NET_DRV:
236		case EXT_MOD_TYPE:
237		case EXT_DISPOSABLE:
238			*(m->m_ext.ref_cnt) = 0;
239			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
240				m->m_ext.ref_cnt));
241			/* FALLTHROUGH */
242		case EXT_EXTREF:
243			KASSERT(m->m_ext.ext_free != NULL,
244				("%s: ext_free not set", __func__));
245			(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
246			    m->m_ext.ext_args);
247			break;
248		default:
249			KASSERT(m->m_ext.ext_type == 0,
250				("%s: unknown ext_type", __func__));
251		}
252	}
253	/*
254	 * Free this mbuf back to the mbuf zone with all m_ext
255	 * information purged.
256	 */
257	m->m_ext.ext_buf = NULL;
258	m->m_ext.ext_free = NULL;
259	m->m_ext.ext_args = NULL;
260	m->m_ext.ref_cnt = NULL;
261	m->m_ext.ext_size = 0;
262	m->m_ext.ext_type = 0;
263	m->m_flags &= ~M_EXT;
264	uma_zfree(zone_mbuf, m);
265}
266
267/*
268 * Attach the the cluster from *m to *n, set up m_ext in *n
269 * and bump the refcount of the cluster.
270 */
271static void
272mb_dupcl(struct mbuf *n, struct mbuf *m)
273{
274	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
275	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
276	KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
277
278	if (*(m->m_ext.ref_cnt) == 1)
279		*(m->m_ext.ref_cnt) += 1;
280	else
281		atomic_add_int(m->m_ext.ref_cnt, 1);
282	n->m_ext.ext_buf = m->m_ext.ext_buf;
283	n->m_ext.ext_free = m->m_ext.ext_free;
284	n->m_ext.ext_args = m->m_ext.ext_args;
285	n->m_ext.ext_size = m->m_ext.ext_size;
286	n->m_ext.ref_cnt = m->m_ext.ref_cnt;
287	n->m_ext.ext_type = m->m_ext.ext_type;
288	n->m_flags |= M_EXT;
289}
290
291/*
292 * Clean up mbuf (chain) from any tags and packet headers.
293 * If "all" is set then the first mbuf in the chain will be
294 * cleaned too.
295 */
296void
297m_demote(struct mbuf *m0, int all)
298{
299	struct mbuf *m;
300
301	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
302		if (m->m_flags & M_PKTHDR) {
303			m_tag_delete_chain(m, NULL);
304			m->m_flags &= ~M_PKTHDR;
305			bzero(&m->m_pkthdr, sizeof(struct pkthdr));
306		}
307		if (m->m_type == MT_HEADER)
308			m->m_type = MT_DATA;
309		if (m != m0 && m->m_nextpkt != NULL)
310			m->m_nextpkt = NULL;
311		m->m_flags = m->m_flags & (M_EXT|M_EOR|M_RDONLY|M_FREELIST);
312	}
313}
314
315/*
316 * Sanity checks on mbuf (chain) for use in KASSERT() and general
317 * debugging.
318 * Returns 0 or panics when bad and 1 on all tests passed.
319 * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
320 * blow up later.
321 */
322int
323m_sanity(struct mbuf *m0, int sanitize)
324{
325	struct mbuf *m;
326	caddr_t a, b;
327	int pktlen = 0;
328
329#define	M_SANITY_ACTION(s)	return (0)
330/* #define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m) */
331
332	for (m = m0; m != NULL; m = m->m_next) {
333		/*
334		 * Basic pointer checks.  If any of these fails then some
335		 * unrelated kernel memory before or after us is trashed.
336		 * No way to recover from that.
337		 */
338		a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf :
339			((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) :
340			 (caddr_t)(&m->m_dat)) );
341		b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size :
342			((m->m_flags & M_PKTHDR) ? MHLEN : MLEN)));
343		if ((caddr_t)m->m_data < a)
344			M_SANITY_ACTION("m_data outside mbuf data range left");
345		if ((caddr_t)m->m_data > b)
346			M_SANITY_ACTION("m_data outside mbuf data range right");
347		if ((caddr_t)m->m_data + m->m_len > b)
348			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
349		if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.header) {
350			if ((caddr_t)m->m_pkthdr.header < a ||
351			    (caddr_t)m->m_pkthdr.header > b)
352				M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
353		}
354
355		/* m->m_nextpkt may only be set on first mbuf in chain. */
356		if (m != m0 && m->m_nextpkt != NULL) {
357			if (sanitize) {
358				m_freem(m->m_nextpkt);
359				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
360			} else
361				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
362		}
363
364		/* correct type correlations. */
365		if (m->m_type == MT_HEADER && !(m->m_flags & M_PKTHDR)) {
366			if (sanitize)
367				m->m_type = MT_DATA;
368			else
369				M_SANITY_ACTION("MT_HEADER set but not M_PKTHDR");
370		}
371
372		/* packet length (not mbuf length!) calculation */
373		if (m0->m_flags & M_PKTHDR)
374			pktlen += m->m_len;
375
376		/* m_tags may only be attached to first mbuf in chain. */
377		if (m != m0 && m->m_flags & M_PKTHDR &&
378		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
379			if (sanitize) {
380				m_tag_delete_chain(m, NULL);
381				/* put in 0xDEADC0DE perhaps? */
382			} else
383				M_SANITY_ACTION("m_tags on in-chain mbuf");
384		}
385
386		/* M_PKTHDR may only be set on first mbuf in chain */
387		if (m != m0 && m->m_flags & M_PKTHDR) {
388			if (sanitize) {
389				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
390				m->m_flags &= ~M_PKTHDR;
391				/* put in 0xDEADCODE and leave hdr flag in */
392			} else
393				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
394		}
395	}
396	m = m0;
397	if (pktlen && pktlen != m->m_pkthdr.len) {
398		if (sanitize)
399			m->m_pkthdr.len = 0;
400		else
401			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
402	}
403	return 1;
404
405#undef	M_SANITY_ACTION
406}
407
408
409/*
410 * "Move" mbuf pkthdr from "from" to "to".
411 * "from" must have M_PKTHDR set, and "to" must be empty.
412 */
413void
414m_move_pkthdr(struct mbuf *to, struct mbuf *from)
415{
416
417#if 0
418	/* see below for why these are not enabled */
419	M_ASSERTPKTHDR(to);
420	/* Note: with MAC, this may not be a good assertion. */
421	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
422	    ("m_move_pkthdr: to has tags"));
423#endif
424#ifdef MAC
425	/*
426	 * XXXMAC: It could be this should also occur for non-MAC?
427	 */
428	if (to->m_flags & M_PKTHDR)
429		m_tag_delete_chain(to, NULL);
430#endif
431	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
432	if ((to->m_flags & M_EXT) == 0)
433		to->m_data = to->m_pktdat;
434	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
435	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
436	from->m_flags &= ~M_PKTHDR;
437}
438
439/*
440 * Duplicate "from"'s mbuf pkthdr in "to".
441 * "from" must have M_PKTHDR set, and "to" must be empty.
442 * In particular, this does a deep copy of the packet tags.
443 */
444int
445m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
446{
447
448#if 0
449	/*
450	 * The mbuf allocator only initializes the pkthdr
451	 * when the mbuf is allocated with MGETHDR. Many users
452	 * (e.g. m_copy*, m_prepend) use MGET and then
453	 * smash the pkthdr as needed causing these
454	 * assertions to trip.  For now just disable them.
455	 */
456	M_ASSERTPKTHDR(to);
457	/* Note: with MAC, this may not be a good assertion. */
458	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
459#endif
460	MBUF_CHECKSLEEP(how);
461#ifdef MAC
462	if (to->m_flags & M_PKTHDR)
463		m_tag_delete_chain(to, NULL);
464#endif
465	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
466	if ((to->m_flags & M_EXT) == 0)
467		to->m_data = to->m_pktdat;
468	to->m_pkthdr = from->m_pkthdr;
469	SLIST_INIT(&to->m_pkthdr.tags);
470	return (m_tag_copy_chain(to, from, MBTOM(how)));
471}
472
473/*
474 * Lesser-used path for M_PREPEND:
475 * allocate new mbuf to prepend to chain,
476 * copy junk along.
477 */
478struct mbuf *
479m_prepend(struct mbuf *m, int len, int how)
480{
481	struct mbuf *mn;
482
483	if (m->m_flags & M_PKTHDR)
484		MGETHDR(mn, how, m->m_type);
485	else
486		MGET(mn, how, m->m_type);
487	if (mn == NULL) {
488		m_freem(m);
489		return (NULL);
490	}
491	if (m->m_flags & M_PKTHDR)
492		M_MOVE_PKTHDR(mn, m);
493	mn->m_next = m;
494	m = mn;
495	if (len < MHLEN)
496		MH_ALIGN(m, len);
497	m->m_len = len;
498	return (m);
499}
500
501/*
502 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
503 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
504 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
505 * Note that the copy is read-only, because clusters are not copied,
506 * only their reference counts are incremented.
507 */
508struct mbuf *
509m_copym(struct mbuf *m, int off0, int len, int wait)
510{
511	struct mbuf *n, **np;
512	int off = off0;
513	struct mbuf *top;
514	int copyhdr = 0;
515
516	KASSERT(off >= 0, ("m_copym, negative off %d", off));
517	KASSERT(len >= 0, ("m_copym, negative len %d", len));
518	MBUF_CHECKSLEEP(wait);
519	if (off == 0 && m->m_flags & M_PKTHDR)
520		copyhdr = 1;
521	while (off > 0) {
522		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
523		if (off < m->m_len)
524			break;
525		off -= m->m_len;
526		m = m->m_next;
527	}
528	np = &top;
529	top = 0;
530	while (len > 0) {
531		if (m == NULL) {
532			KASSERT(len == M_COPYALL,
533			    ("m_copym, length > size of mbuf chain"));
534			break;
535		}
536		if (copyhdr)
537			MGETHDR(n, wait, m->m_type);
538		else
539			MGET(n, wait, m->m_type);
540		*np = n;
541		if (n == NULL)
542			goto nospace;
543		if (copyhdr) {
544			if (!m_dup_pkthdr(n, m, wait))
545				goto nospace;
546			if (len == M_COPYALL)
547				n->m_pkthdr.len -= off0;
548			else
549				n->m_pkthdr.len = len;
550			copyhdr = 0;
551		}
552		n->m_len = min(len, m->m_len - off);
553		if (m->m_flags & M_EXT) {
554			n->m_data = m->m_data + off;
555			mb_dupcl(n, m);
556		} else
557			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
558			    (u_int)n->m_len);
559		if (len != M_COPYALL)
560			len -= n->m_len;
561		off = 0;
562		m = m->m_next;
563		np = &n->m_next;
564	}
565	if (top == NULL)
566		mbstat.m_mcfail++;	/* XXX: No consistency. */
567
568	return (top);
569nospace:
570	m_freem(top);
571	mbstat.m_mcfail++;	/* XXX: No consistency. */
572	return (NULL);
573}
574
575/*
576 * Returns mbuf chain with new head for the prepending case.
577 * Copies from mbuf (chain) n from off for len to mbuf (chain) m
578 * either prepending or appending the data.
579 * The resulting mbuf (chain) m is fully writeable.
580 * m is destination (is made writeable)
581 * n is source, off is offset in source, len is len from offset
582 * dir, 0 append, 1 prepend
583 * how, wait or nowait
584 */
585
586static int
587m_bcopyxxx(void *s, void *t, u_int len)
588{
589	bcopy(s, t, (size_t)len);
590	return 0;
591}
592
593struct mbuf *
594m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
595    int prep, int how)
596{
597	struct mbuf *mm, *x, *z, *prev = NULL;
598	caddr_t p;
599	int i, nlen = 0;
600	caddr_t buf[MLEN];
601
602	KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source"));
603	KASSERT(off >= 0, ("m_copymdata, negative off %d", off));
604	KASSERT(len >= 0, ("m_copymdata, negative len %d", len));
605	KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep));
606
607	mm = m;
608	if (!prep) {
609		while(mm->m_next) {
610			prev = mm;
611			mm = mm->m_next;
612		}
613	}
614	for (z = n; z != NULL; z = z->m_next)
615		nlen += z->m_len;
616	if (len == M_COPYALL)
617		len = nlen - off;
618	if (off + len > nlen || len < 1)
619		return NULL;
620
621	if (!M_WRITABLE(mm)) {
622		/* XXX: Use proper m_xxx function instead. */
623		x = m_getcl(how, MT_DATA, mm->m_flags);
624		if (x == NULL)
625			return NULL;
626		bcopy(mm->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
627		p = x->m_ext.ext_buf + (mm->m_data - mm->m_ext.ext_buf);
628		x->m_data = p;
629		mm->m_next = NULL;
630		if (mm != m)
631			prev->m_next = x;
632		m_free(mm);
633		mm = x;
634	}
635
636	/*
637	 * Append/prepend the data.  Allocating mbufs as necessary.
638	 */
639	/* Shortcut if enough free space in first/last mbuf. */
640	if (!prep && M_TRAILINGSPACE(mm) >= len) {
641		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t) +
642			 mm->m_len);
643		mm->m_len += len;
644		mm->m_pkthdr.len += len;
645		return m;
646	}
647	if (prep && M_LEADINGSPACE(mm) >= len) {
648		mm->m_data = mtod(mm, caddr_t) - len;
649		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t));
650		mm->m_len += len;
651		mm->m_pkthdr.len += len;
652		return mm;
653	}
654
655	/* Expand first/last mbuf to cluster if possible. */
656	if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) {
657		bcopy(mm->m_data, &buf, mm->m_len);
658		m_clget(mm, how);
659		if (!(mm->m_flags & M_EXT))
660			return NULL;
661		bcopy(&buf, mm->m_ext.ext_buf, mm->m_len);
662		mm->m_data = mm->m_ext.ext_buf;
663		mm->m_pkthdr.header = NULL;
664	}
665	if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) {
666		bcopy(mm->m_data, &buf, mm->m_len);
667		m_clget(mm, how);
668		if (!(mm->m_flags & M_EXT))
669			return NULL;
670		bcopy(&buf, (caddr_t *)mm->m_ext.ext_buf +
671		       mm->m_ext.ext_size - mm->m_len, mm->m_len);
672		mm->m_data = (caddr_t)mm->m_ext.ext_buf +
673			      mm->m_ext.ext_size - mm->m_len;
674		mm->m_pkthdr.header = NULL;
675	}
676
677	/* Append/prepend as many mbuf (clusters) as necessary to fit len. */
678	if (!prep && len > M_TRAILINGSPACE(mm)) {
679		if (!m_getm(mm, len - M_TRAILINGSPACE(mm), how, MT_DATA))
680			return NULL;
681	}
682	if (prep && len > M_LEADINGSPACE(mm)) {
683		if (!(z = m_getm(NULL, len - M_LEADINGSPACE(mm), how, MT_DATA)))
684			return NULL;
685		i = 0;
686		for (x = z; x != NULL; x = x->m_next) {
687			i += x->m_flags & M_EXT ? x->m_ext.ext_size :
688			      (x->m_flags & M_PKTHDR ? MHLEN : MLEN);
689			if (!x->m_next)
690				break;
691		}
692		z->m_data += i - len;
693		m_move_pkthdr(mm, z);
694		x->m_next = mm;
695		mm = z;
696	}
697
698	/* Seek to start position in source mbuf. Optimization for long chains. */
699	while (off > 0) {
700		if (off < n->m_len)
701			break;
702		off -= n->m_len;
703		n = n->m_next;
704	}
705
706	/* Copy data into target mbuf. */
707	z = mm;
708	while (len > 0) {
709		KASSERT(z != NULL, ("m_copymdata, falling off target edge"));
710		i = M_TRAILINGSPACE(z);
711		m_apply(n, off, i, m_bcopyxxx, mtod(z, caddr_t) + z->m_len);
712		z->m_len += i;
713		/* fixup pkthdr.len if necessary */
714		if ((prep ? mm : m)->m_flags & M_PKTHDR)
715			(prep ? mm : m)->m_pkthdr.len += i;
716		off += i;
717		len -= i;
718		z = z->m_next;
719	}
720	return (prep ? mm : m);
721}
722
723/*
724 * Copy an entire packet, including header (which must be present).
725 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
726 * Note that the copy is read-only, because clusters are not copied,
727 * only their reference counts are incremented.
728 * Preserve alignment of the first mbuf so if the creator has left
729 * some room at the beginning (e.g. for inserting protocol headers)
730 * the copies still have the room available.
731 */
732struct mbuf *
733m_copypacket(struct mbuf *m, int how)
734{
735	struct mbuf *top, *n, *o;
736
737	MBUF_CHECKSLEEP(how);
738	MGET(n, how, m->m_type);
739	top = n;
740	if (n == NULL)
741		goto nospace;
742
743	if (!m_dup_pkthdr(n, m, how))
744		goto nospace;
745	n->m_len = m->m_len;
746	if (m->m_flags & M_EXT) {
747		n->m_data = m->m_data;
748		mb_dupcl(n, m);
749	} else {
750		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
751		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
752	}
753
754	m = m->m_next;
755	while (m) {
756		MGET(o, how, m->m_type);
757		if (o == NULL)
758			goto nospace;
759
760		n->m_next = o;
761		n = n->m_next;
762
763		n->m_len = m->m_len;
764		if (m->m_flags & M_EXT) {
765			n->m_data = m->m_data;
766			mb_dupcl(n, m);
767		} else {
768			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
769		}
770
771		m = m->m_next;
772	}
773	return top;
774nospace:
775	m_freem(top);
776	mbstat.m_mcfail++;	/* XXX: No consistency. */
777	return (NULL);
778}
779
780/*
781 * Copy data from an mbuf chain starting "off" bytes from the beginning,
782 * continuing for "len" bytes, into the indicated buffer.
783 */
784void
785m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
786{
787	u_int count;
788
789	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
790	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
791	while (off > 0) {
792		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
793		if (off < m->m_len)
794			break;
795		off -= m->m_len;
796		m = m->m_next;
797	}
798	while (len > 0) {
799		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
800		count = min(m->m_len - off, len);
801		bcopy(mtod(m, caddr_t) + off, cp, count);
802		len -= count;
803		cp += count;
804		off = 0;
805		m = m->m_next;
806	}
807}
808
809/*
810 * Copy a packet header mbuf chain into a completely new chain, including
811 * copying any mbuf clusters.  Use this instead of m_copypacket() when
812 * you need a writable copy of an mbuf chain.
813 */
814struct mbuf *
815m_dup(struct mbuf *m, int how)
816{
817	struct mbuf **p, *top = NULL;
818	int remain, moff, nsize;
819
820	MBUF_CHECKSLEEP(how);
821	/* Sanity check */
822	if (m == NULL)
823		return (NULL);
824	M_ASSERTPKTHDR(m);
825
826	/* While there's more data, get a new mbuf, tack it on, and fill it */
827	remain = m->m_pkthdr.len;
828	moff = 0;
829	p = &top;
830	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
831		struct mbuf *n;
832
833		/* Get the next new mbuf */
834		if (remain >= MINCLSIZE) {
835			n = m_getcl(how, m->m_type, 0);
836			nsize = MCLBYTES;
837		} else {
838			n = m_get(how, m->m_type);
839			nsize = MLEN;
840		}
841		if (n == NULL)
842			goto nospace;
843
844		if (top == NULL) {		/* First one, must be PKTHDR */
845			if (!m_dup_pkthdr(n, m, how)) {
846				m_free(n);
847				goto nospace;
848			}
849			nsize = MHLEN;
850		}
851		n->m_len = 0;
852
853		/* Link it into the new chain */
854		*p = n;
855		p = &n->m_next;
856
857		/* Copy data from original mbuf(s) into new mbuf */
858		while (n->m_len < nsize && m != NULL) {
859			int chunk = min(nsize - n->m_len, m->m_len - moff);
860
861			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
862			moff += chunk;
863			n->m_len += chunk;
864			remain -= chunk;
865			if (moff == m->m_len) {
866				m = m->m_next;
867				moff = 0;
868			}
869		}
870
871		/* Check correct total mbuf length */
872		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
873		    	("%s: bogus m_pkthdr.len", __func__));
874	}
875	return (top);
876
877nospace:
878	m_freem(top);
879	mbstat.m_mcfail++;	/* XXX: No consistency. */
880	return (NULL);
881}
882
883/*
884 * Concatenate mbuf chain n to m.
885 * Both chains must be of the same type (e.g. MT_DATA).
886 * Any m_pkthdr is not updated.
887 */
888void
889m_cat(struct mbuf *m, struct mbuf *n)
890{
891	while (m->m_next)
892		m = m->m_next;
893	while (n) {
894		if (m->m_flags & M_EXT ||
895		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
896			/* just join the two chains */
897			m->m_next = n;
898			return;
899		}
900		/* splat the data from one into the other */
901		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
902		    (u_int)n->m_len);
903		m->m_len += n->m_len;
904		n = m_free(n);
905	}
906}
907
908void
909m_adj(struct mbuf *mp, int req_len)
910{
911	int len = req_len;
912	struct mbuf *m;
913	int count;
914
915	if ((m = mp) == NULL)
916		return;
917	if (len >= 0) {
918		/*
919		 * Trim from head.
920		 */
921		while (m != NULL && len > 0) {
922			if (m->m_len <= len) {
923				len -= m->m_len;
924				m->m_len = 0;
925				m = m->m_next;
926			} else {
927				m->m_len -= len;
928				m->m_data += len;
929				len = 0;
930			}
931		}
932		m = mp;
933		if (mp->m_flags & M_PKTHDR)
934			m->m_pkthdr.len -= (req_len - len);
935	} else {
936		/*
937		 * Trim from tail.  Scan the mbuf chain,
938		 * calculating its length and finding the last mbuf.
939		 * If the adjustment only affects this mbuf, then just
940		 * adjust and return.  Otherwise, rescan and truncate
941		 * after the remaining size.
942		 */
943		len = -len;
944		count = 0;
945		for (;;) {
946			count += m->m_len;
947			if (m->m_next == (struct mbuf *)0)
948				break;
949			m = m->m_next;
950		}
951		if (m->m_len >= len) {
952			m->m_len -= len;
953			if (mp->m_flags & M_PKTHDR)
954				mp->m_pkthdr.len -= len;
955			return;
956		}
957		count -= len;
958		if (count < 0)
959			count = 0;
960		/*
961		 * Correct length for chain is "count".
962		 * Find the mbuf with last data, adjust its length,
963		 * and toss data from remaining mbufs on chain.
964		 */
965		m = mp;
966		if (m->m_flags & M_PKTHDR)
967			m->m_pkthdr.len = count;
968		for (; m; m = m->m_next) {
969			if (m->m_len >= count) {
970				m->m_len = count;
971				if (m->m_next != NULL) {
972					m_freem(m->m_next);
973					m->m_next = NULL;
974				}
975				break;
976			}
977			count -= m->m_len;
978		}
979	}
980}
981
982/*
983 * Rearange an mbuf chain so that len bytes are contiguous
984 * and in the data area of an mbuf (so that mtod and dtom
985 * will work for a structure of size len).  Returns the resulting
986 * mbuf chain on success, frees it and returns null on failure.
987 * If there is room, it will add up to max_protohdr-len extra bytes to the
988 * contiguous region in an attempt to avoid being called next time.
989 */
990struct mbuf *
991m_pullup(struct mbuf *n, int len)
992{
993	struct mbuf *m;
994	int count;
995	int space;
996
997	/*
998	 * If first mbuf has no cluster, and has room for len bytes
999	 * without shifting current data, pullup into it,
1000	 * otherwise allocate a new mbuf to prepend to the chain.
1001	 */
1002	if ((n->m_flags & M_EXT) == 0 &&
1003	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
1004		if (n->m_len >= len)
1005			return (n);
1006		m = n;
1007		n = n->m_next;
1008		len -= m->m_len;
1009	} else {
1010		if (len > MHLEN)
1011			goto bad;
1012		MGET(m, M_DONTWAIT, n->m_type);
1013		if (m == NULL)
1014			goto bad;
1015		m->m_len = 0;
1016		if (n->m_flags & M_PKTHDR)
1017			M_MOVE_PKTHDR(m, n);
1018	}
1019	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1020	do {
1021		count = min(min(max(len, max_protohdr), space), n->m_len);
1022		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1023		  (u_int)count);
1024		len -= count;
1025		m->m_len += count;
1026		n->m_len -= count;
1027		space -= count;
1028		if (n->m_len)
1029			n->m_data += count;
1030		else
1031			n = m_free(n);
1032	} while (len > 0 && n);
1033	if (len > 0) {
1034		(void) m_free(m);
1035		goto bad;
1036	}
1037	m->m_next = n;
1038	return (m);
1039bad:
1040	m_freem(n);
1041	mbstat.m_mpfail++;	/* XXX: No consistency. */
1042	return (NULL);
1043}
1044
1045/*
1046 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1047 * the amount of empty space before the data in the new mbuf to be specified
1048 * (in the event that the caller expects to prepend later).
1049 */
1050int MSFail;
1051
1052struct mbuf *
1053m_copyup(struct mbuf *n, int len, int dstoff)
1054{
1055	struct mbuf *m;
1056	int count, space;
1057
1058	if (len > (MHLEN - dstoff))
1059		goto bad;
1060	MGET(m, M_DONTWAIT, n->m_type);
1061	if (m == NULL)
1062		goto bad;
1063	m->m_len = 0;
1064	if (n->m_flags & M_PKTHDR)
1065		M_MOVE_PKTHDR(m, n);
1066	m->m_data += dstoff;
1067	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1068	do {
1069		count = min(min(max(len, max_protohdr), space), n->m_len);
1070		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
1071		    (unsigned)count);
1072		len -= count;
1073		m->m_len += count;
1074		n->m_len -= count;
1075		space -= count;
1076		if (n->m_len)
1077			n->m_data += count;
1078		else
1079			n = m_free(n);
1080	} while (len > 0 && n);
1081	if (len > 0) {
1082		(void) m_free(m);
1083		goto bad;
1084	}
1085	m->m_next = n;
1086	return (m);
1087 bad:
1088	m_freem(n);
1089	MSFail++;
1090	return (NULL);
1091}
1092
1093/*
1094 * Partition an mbuf chain in two pieces, returning the tail --
1095 * all but the first len0 bytes.  In case of failure, it returns NULL and
1096 * attempts to restore the chain to its original state.
1097 *
1098 * Note that the resulting mbufs might be read-only, because the new
1099 * mbuf can end up sharing an mbuf cluster with the original mbuf if
1100 * the "breaking point" happens to lie within a cluster mbuf. Use the
1101 * M_WRITABLE() macro to check for this case.
1102 */
1103struct mbuf *
1104m_split(struct mbuf *m0, int len0, int wait)
1105{
1106	struct mbuf *m, *n;
1107	u_int len = len0, remain;
1108
1109	MBUF_CHECKSLEEP(wait);
1110	for (m = m0; m && len > m->m_len; m = m->m_next)
1111		len -= m->m_len;
1112	if (m == NULL)
1113		return (NULL);
1114	remain = m->m_len - len;
1115	if (m0->m_flags & M_PKTHDR) {
1116		MGETHDR(n, wait, m0->m_type);
1117		if (n == NULL)
1118			return (NULL);
1119		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1120		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1121		m0->m_pkthdr.len = len0;
1122		if (m->m_flags & M_EXT)
1123			goto extpacket;
1124		if (remain > MHLEN) {
1125			/* m can't be the lead packet */
1126			MH_ALIGN(n, 0);
1127			n->m_next = m_split(m, len, wait);
1128			if (n->m_next == NULL) {
1129				(void) m_free(n);
1130				return (NULL);
1131			} else {
1132				n->m_len = 0;
1133				return (n);
1134			}
1135		} else
1136			MH_ALIGN(n, remain);
1137	} else if (remain == 0) {
1138		n = m->m_next;
1139		m->m_next = NULL;
1140		return (n);
1141	} else {
1142		MGET(n, wait, m->m_type);
1143		if (n == NULL)
1144			return (NULL);
1145		M_ALIGN(n, remain);
1146	}
1147extpacket:
1148	if (m->m_flags & M_EXT) {
1149		n->m_data = m->m_data + len;
1150		mb_dupcl(n, m);
1151	} else {
1152		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1153	}
1154	n->m_len = remain;
1155	m->m_len = len;
1156	n->m_next = m->m_next;
1157	m->m_next = NULL;
1158	return (n);
1159}
1160/*
1161 * Routine to copy from device local memory into mbufs.
1162 * Note that `off' argument is offset into first mbuf of target chain from
1163 * which to begin copying the data to.
1164 */
1165struct mbuf *
1166m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
1167	 void (*copy)(char *from, caddr_t to, u_int len))
1168{
1169	struct mbuf *m;
1170	struct mbuf *top = NULL, **mp = &top;
1171	int len;
1172
1173	if (off < 0 || off > MHLEN)
1174		return (NULL);
1175
1176	while (totlen > 0) {
1177		if (top == NULL) {	/* First one, must be PKTHDR */
1178			if (totlen + off >= MINCLSIZE) {
1179				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1180				len = MCLBYTES;
1181			} else {
1182				m = m_gethdr(M_DONTWAIT, MT_DATA);
1183				len = MHLEN;
1184
1185				/* Place initial small packet/header at end of mbuf */
1186				if (m && totlen + off + max_linkhdr <= MLEN) {
1187					m->m_data += max_linkhdr;
1188					len -= max_linkhdr;
1189				}
1190			}
1191			if (m == NULL)
1192				return NULL;
1193			m->m_pkthdr.rcvif = ifp;
1194			m->m_pkthdr.len = totlen;
1195		} else {
1196			if (totlen + off >= MINCLSIZE) {
1197				m = m_getcl(M_DONTWAIT, MT_DATA, 0);
1198				len = MCLBYTES;
1199			} else {
1200				m = m_get(M_DONTWAIT, MT_DATA);
1201				len = MLEN;
1202			}
1203			if (m == NULL) {
1204				m_freem(top);
1205				return NULL;
1206			}
1207		}
1208		if (off) {
1209			m->m_data += off;
1210			len -= off;
1211			off = 0;
1212		}
1213		m->m_len = len = min(totlen, len);
1214		if (copy)
1215			copy(buf, mtod(m, caddr_t), (u_int)len);
1216		else
1217			bcopy(buf, mtod(m, caddr_t), (u_int)len);
1218		buf += len;
1219		*mp = m;
1220		mp = &m->m_next;
1221		totlen -= len;
1222	}
1223	return (top);
1224}
1225
1226/*
1227 * Copy data from a buffer back into the indicated mbuf chain,
1228 * starting "off" bytes from the beginning, extending the mbuf
1229 * chain if necessary.
1230 */
1231void
1232m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
1233{
1234	int mlen;
1235	struct mbuf *m = m0, *n;
1236	int totlen = 0;
1237
1238	if (m0 == NULL)
1239		return;
1240	while (off > (mlen = m->m_len)) {
1241		off -= mlen;
1242		totlen += mlen;
1243		if (m->m_next == NULL) {
1244			n = m_get(M_DONTWAIT, m->m_type);
1245			if (n == NULL)
1246				goto out;
1247			bzero(mtod(n, caddr_t), MLEN);
1248			n->m_len = min(MLEN, len + off);
1249			m->m_next = n;
1250		}
1251		m = m->m_next;
1252	}
1253	while (len > 0) {
1254		mlen = min (m->m_len - off, len);
1255		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
1256		cp += mlen;
1257		len -= mlen;
1258		mlen += off;
1259		off = 0;
1260		totlen += mlen;
1261		if (len == 0)
1262			break;
1263		if (m->m_next == NULL) {
1264			n = m_get(M_DONTWAIT, m->m_type);
1265			if (n == NULL)
1266				break;
1267			n->m_len = min(MLEN, len);
1268			m->m_next = n;
1269		}
1270		m = m->m_next;
1271	}
1272out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1273		m->m_pkthdr.len = totlen;
1274}
1275
1276/*
1277 * Append the specified data to the indicated mbuf chain,
1278 * Extend the mbuf chain if the new data does not fit in
1279 * existing space.
1280 *
1281 * Return 1 if able to complete the job; otherwise 0.
1282 */
1283int
1284m_append(struct mbuf *m0, int len, c_caddr_t cp)
1285{
1286	struct mbuf *m, *n;
1287	int remainder, space;
1288
1289	for (m = m0; m->m_next != NULL; m = m->m_next)
1290		;
1291	remainder = len;
1292	space = M_TRAILINGSPACE(m);
1293	if (space > 0) {
1294		/*
1295		 * Copy into available space.
1296		 */
1297		if (space > remainder)
1298			space = remainder;
1299		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1300		m->m_len += space;
1301		cp += space, remainder -= space;
1302	}
1303	while (remainder > 0) {
1304		/*
1305		 * Allocate a new mbuf; could check space
1306		 * and allocate a cluster instead.
1307		 */
1308		n = m_get(M_DONTWAIT, m->m_type);
1309		if (n == NULL)
1310			break;
1311		n->m_len = min(MLEN, remainder);
1312		bcopy(cp, mtod(n, caddr_t), n->m_len);
1313		cp += n->m_len, remainder -= n->m_len;
1314		m->m_next = n;
1315		m = n;
1316	}
1317	if (m0->m_flags & M_PKTHDR)
1318		m0->m_pkthdr.len += len - remainder;
1319	return (remainder == 0);
1320}
1321
1322/*
1323 * Apply function f to the data in an mbuf chain starting "off" bytes from
1324 * the beginning, continuing for "len" bytes.
1325 */
1326int
1327m_apply(struct mbuf *m, int off, int len,
1328    int (*f)(void *, void *, u_int), void *arg)
1329{
1330	u_int count;
1331	int rval;
1332
1333	KASSERT(off >= 0, ("m_apply, negative off %d", off));
1334	KASSERT(len >= 0, ("m_apply, negative len %d", len));
1335	while (off > 0) {
1336		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1337		if (off < m->m_len)
1338			break;
1339		off -= m->m_len;
1340		m = m->m_next;
1341	}
1342	while (len > 0) {
1343		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1344		count = min(m->m_len - off, len);
1345		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1346		if (rval)
1347			return (rval);
1348		len -= count;
1349		off = 0;
1350		m = m->m_next;
1351	}
1352	return (0);
1353}
1354
1355/*
1356 * Return a pointer to mbuf/offset of location in mbuf chain.
1357 */
1358struct mbuf *
1359m_getptr(struct mbuf *m, int loc, int *off)
1360{
1361
1362	while (loc >= 0) {
1363		/* Normal end of search. */
1364		if (m->m_len > loc) {
1365			*off = loc;
1366			return (m);
1367		} else {
1368			loc -= m->m_len;
1369			if (m->m_next == NULL) {
1370				if (loc == 0) {
1371					/* Point at the end of valid data. */
1372					*off = m->m_len;
1373					return (m);
1374				}
1375				return (NULL);
1376			}
1377			m = m->m_next;
1378		}
1379	}
1380	return (NULL);
1381}
1382
1383void
1384m_print(const struct mbuf *m, int maxlen)
1385{
1386	int len;
1387	int pdata;
1388	const struct mbuf *m2;
1389
1390	if (m->m_flags & M_PKTHDR)
1391		len = m->m_pkthdr.len;
1392	else
1393		len = -1;
1394	m2 = m;
1395	while (m2 != NULL && (len == -1 || len)) {
1396		pdata = m2->m_len;
1397		if (maxlen != -1 && pdata > maxlen)
1398			pdata = maxlen;
1399		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1400		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
1401		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1402		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1403		if (pdata)
1404			printf(", %*D\n", m2->m_len, (u_char *)m2->m_data, "-");
1405		if (len != -1)
1406			len -= m2->m_len;
1407		m2 = m2->m_next;
1408	}
1409	if (len > 0)
1410		printf("%d bytes unaccounted for.\n", len);
1411	return;
1412}
1413
1414u_int
1415m_fixhdr(struct mbuf *m0)
1416{
1417	u_int len;
1418
1419	len = m_length(m0, NULL);
1420	m0->m_pkthdr.len = len;
1421	return (len);
1422}
1423
1424u_int
1425m_length(struct mbuf *m0, struct mbuf **last)
1426{
1427	struct mbuf *m;
1428	u_int len;
1429
1430	len = 0;
1431	for (m = m0; m != NULL; m = m->m_next) {
1432		len += m->m_len;
1433		if (m->m_next == NULL)
1434			break;
1435	}
1436	if (last != NULL)
1437		*last = m;
1438	return (len);
1439}
1440
1441/*
1442 * Defragment a mbuf chain, returning the shortest possible
1443 * chain of mbufs and clusters.  If allocation fails and
1444 * this cannot be completed, NULL will be returned, but
1445 * the passed in chain will be unchanged.  Upon success,
1446 * the original chain will be freed, and the new chain
1447 * will be returned.
1448 *
1449 * If a non-packet header is passed in, the original
1450 * mbuf (chain?) will be returned unharmed.
1451 */
1452struct mbuf *
1453m_defrag(struct mbuf *m0, int how)
1454{
1455	struct mbuf *m_new = NULL, *m_final = NULL;
1456	int progress = 0, length;
1457
1458	MBUF_CHECKSLEEP(how);
1459	if (!(m0->m_flags & M_PKTHDR))
1460		return (m0);
1461
1462	m_fixhdr(m0); /* Needed sanity check */
1463
1464#ifdef MBUF_STRESS_TEST
1465	if (m_defragrandomfailures) {
1466		int temp = arc4random() & 0xff;
1467		if (temp == 0xba)
1468			goto nospace;
1469	}
1470#endif
1471
1472	if (m0->m_pkthdr.len > MHLEN)
1473		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1474	else
1475		m_final = m_gethdr(how, MT_DATA);
1476
1477	if (m_final == NULL)
1478		goto nospace;
1479
1480	if (m_dup_pkthdr(m_final, m0, how) == 0)
1481		goto nospace;
1482
1483	m_new = m_final;
1484
1485	while (progress < m0->m_pkthdr.len) {
1486		length = m0->m_pkthdr.len - progress;
1487		if (length > MCLBYTES)
1488			length = MCLBYTES;
1489
1490		if (m_new == NULL) {
1491			if (length > MLEN)
1492				m_new = m_getcl(how, MT_DATA, 0);
1493			else
1494				m_new = m_get(how, MT_DATA);
1495			if (m_new == NULL)
1496				goto nospace;
1497		}
1498
1499		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1500		progress += length;
1501		m_new->m_len = length;
1502		if (m_new != m_final)
1503			m_cat(m_final, m_new);
1504		m_new = NULL;
1505	}
1506#ifdef MBUF_STRESS_TEST
1507	if (m0->m_next == NULL)
1508		m_defraguseless++;
1509#endif
1510	m_freem(m0);
1511	m0 = m_final;
1512#ifdef MBUF_STRESS_TEST
1513	m_defragpackets++;
1514	m_defragbytes += m0->m_pkthdr.len;
1515#endif
1516	return (m0);
1517nospace:
1518#ifdef MBUF_STRESS_TEST
1519	m_defragfailure++;
1520#endif
1521	if (m_final)
1522		m_freem(m_final);
1523	return (NULL);
1524}
1525
1526#ifdef MBUF_STRESS_TEST
1527
1528/*
1529 * Fragment an mbuf chain.  There's no reason you'd ever want to do
1530 * this in normal usage, but it's great for stress testing various
1531 * mbuf consumers.
1532 *
1533 * If fragmentation is not possible, the original chain will be
1534 * returned.
1535 *
1536 * Possible length values:
1537 * 0	 no fragmentation will occur
1538 * > 0	each fragment will be of the specified length
1539 * -1	each fragment will be the same random value in length
1540 * -2	each fragment's length will be entirely random
1541 * (Random values range from 1 to 256)
1542 */
1543struct mbuf *
1544m_fragment(struct mbuf *m0, int how, int length)
1545{
1546	struct mbuf *m_new = NULL, *m_final = NULL;
1547	int progress = 0;
1548
1549	if (!(m0->m_flags & M_PKTHDR))
1550		return (m0);
1551
1552	if ((length == 0) || (length < -2))
1553		return (m0);
1554
1555	m_fixhdr(m0); /* Needed sanity check */
1556
1557	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1558
1559	if (m_final == NULL)
1560		goto nospace;
1561
1562	if (m_dup_pkthdr(m_final, m0, how) == 0)
1563		goto nospace;
1564
1565	m_new = m_final;
1566
1567	if (length == -1)
1568		length = 1 + (arc4random() & 255);
1569
1570	while (progress < m0->m_pkthdr.len) {
1571		int fraglen;
1572
1573		if (length > 0)
1574			fraglen = length;
1575		else
1576			fraglen = 1 + (arc4random() & 255);
1577		if (fraglen > m0->m_pkthdr.len - progress)
1578			fraglen = m0->m_pkthdr.len - progress;
1579
1580		if (fraglen > MCLBYTES)
1581			fraglen = MCLBYTES;
1582
1583		if (m_new == NULL) {
1584			m_new = m_getcl(how, MT_DATA, 0);
1585			if (m_new == NULL)
1586				goto nospace;
1587		}
1588
1589		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1590		progress += fraglen;
1591		m_new->m_len = fraglen;
1592		if (m_new != m_final)
1593			m_cat(m_final, m_new);
1594		m_new = NULL;
1595	}
1596	m_freem(m0);
1597	m0 = m_final;
1598	return (m0);
1599nospace:
1600	if (m_final)
1601		m_freem(m_final);
1602	/* Return the original chain on failure */
1603	return (m0);
1604}
1605
1606#endif
1607
1608struct mbuf *
1609m_uiotombuf(struct uio *uio, int how, int len, int align)
1610{
1611	struct mbuf *m_new = NULL, *m_final = NULL;
1612	int progress = 0, error = 0, length, total;
1613
1614	if (len > 0)
1615		total = min(uio->uio_resid, len);
1616	else
1617		total = uio->uio_resid;
1618	if (align >= MHLEN)
1619		goto nospace;
1620	if (total + align > MHLEN)
1621		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1622	else
1623		m_final = m_gethdr(how, MT_DATA);
1624	if (m_final == NULL)
1625		goto nospace;
1626	m_final->m_data += align;
1627	m_new = m_final;
1628	while (progress < total) {
1629		length = total - progress;
1630		if (length > MCLBYTES)
1631			length = MCLBYTES;
1632		if (m_new == NULL) {
1633			if (length > MLEN)
1634				m_new = m_getcl(how, MT_DATA, 0);
1635			else
1636				m_new = m_get(how, MT_DATA);
1637			if (m_new == NULL)
1638				goto nospace;
1639		}
1640		error = uiomove(mtod(m_new, void *), length, uio);
1641		if (error)
1642			goto nospace;
1643		progress += length;
1644		m_new->m_len = length;
1645		if (m_new != m_final)
1646			m_cat(m_final, m_new);
1647		m_new = NULL;
1648	}
1649	m_fixhdr(m_final);
1650	return (m_final);
1651nospace:
1652	if (m_new)
1653		m_free(m_new);
1654	if (m_final)
1655		m_freem(m_final);
1656	return (NULL);
1657}
1658
1659/*
1660 * Set the m_data pointer of a newly-allocated mbuf
1661 * to place an object of the specified size at the
1662 * end of the mbuf, longword aligned.
1663 */
1664void
1665m_align(struct mbuf *m, int len)
1666{
1667	int adjust;
1668
1669	if (m->m_flags & M_EXT)
1670		adjust = m->m_ext.ext_size - len;
1671	else if (m->m_flags & M_PKTHDR)
1672		adjust = MHLEN - len;
1673	else
1674		adjust = MLEN - len;
1675	m->m_data += adjust &~ (sizeof(long)-1);
1676}
1677