uipc_mbuf.c revision 116182
1275970Scy/*
2275970Scy * Copyright (c) 1982, 1986, 1988, 1991, 1993
3275970Scy *	The Regents of the University of California.  All rights reserved.
4275970Scy *
5275970Scy * Redistribution and use in source and binary forms, with or without
6275970Scy * modification, are permitted provided that the following conditions
7275970Scy * are met:
8275970Scy * 1. Redistributions of source code must retain the above copyright
9275970Scy *    notice, this list of conditions and the following disclaimer.
10275970Scy * 2. Redistributions in binary form must reproduce the above copyright
11275970Scy *    notice, this list of conditions and the following disclaimer in the
12275970Scy *    documentation and/or other materials provided with the distribution.
13275970Scy * 3. All advertising materials mentioning features or use of this software
14275970Scy *    must display the following acknowledgement:
15275970Scy *	This product includes software developed by the University of
16275970Scy *	California, Berkeley and its contributors.
17275970Scy * 4. Neither the name of the University nor the names of its contributors
18275970Scy *    may be used to endorse or promote products derived from this software
19275970Scy *    without specific prior written permission.
20275970Scy *
21275970Scy * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22275970Scy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23275970Scy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24275970Scy * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25275970Scy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26275970Scy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27275970Scy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28275970Scy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29275970Scy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30275970Scy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31275970Scy * SUCH DAMAGE.
32275970Scy *
33275970Scy *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34275970Scy */
35275970Scy
36275970Scy#include <sys/cdefs.h>
37275970Scy__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 116182 2003-06-11 00:56:59Z obrien $");
38275970Scy
39275970Scy#include "opt_mac.h"
40275970Scy#include "opt_param.h"
41275970Scy#include "opt_mbuf_stress_test.h"
42275970Scy
43275970Scy#include <sys/param.h>
44275970Scy#include <sys/systm.h>
45275970Scy#include <sys/kernel.h>
46275970Scy#include <sys/lock.h>
47275970Scy#include <sys/mac.h>
48275970Scy#include <sys/malloc.h>
49275970Scy#include <sys/mbuf.h>
50275970Scy#include <sys/sysctl.h>
51275970Scy#include <sys/domain.h>
52275970Scy#include <sys/protosw.h>
53275970Scy
54275970Scyint	max_linkhdr;
55275970Scyint	max_protohdr;
56275970Scyint	max_hdr;
57275970Scyint	max_datalen;
58275970Scyint	m_defragpackets;
59275970Scyint	m_defragbytes;
60275970Scyint	m_defraguseless;
61275970Scyint	m_defragfailure;
62275970Scy#ifdef MBUF_STRESS_TEST
63275970Scyint	m_defragrandomfailures;
64275970Scy#endif
65275970Scy
66275970Scy/*
67275970Scy * sysctl(8) exported objects
68275970Scy */
69275970ScySYSCTL_DECL(_kern_ipc);
70275970ScySYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
71275970Scy	   &max_linkhdr, 0, "");
72275970ScySYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
73275970Scy	   &max_protohdr, 0, "");
74275970ScySYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
75275970ScySYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
76275970Scy	   &max_datalen, 0, "");
77275970ScySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
78275970Scy	   &m_defragpackets, 0, "");
79275970ScySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
80275970Scy	   &m_defragbytes, 0, "");
81275970ScySYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
82275970Scy	   &m_defraguseless, 0, "");
83275970ScySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
84275970Scy	   &m_defragfailure, 0, "");
85275970Scy#ifdef MBUF_STRESS_TEST
86275970ScySYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
87275970Scy	   &m_defragrandomfailures, 0, "");
88275970Scy#endif
89275970Scy
90275970Scy/*
91275970Scy * "Move" mbuf pkthdr from "from" to "to".
92275970Scy * "from" must have M_PKTHDR set, and "to" must be empty.
93275970Scy */
94275970Scyvoid
95275970Scym_move_pkthdr(struct mbuf *to, struct mbuf *from)
96275970Scy{
97275970Scy
98275970Scy#if 0
99275970Scy	/* see below for why these are not enabled */
100275970Scy	M_ASSERTPKTHDR(to);
101275970Scy	/* Note: with MAC, this may not be a good assertion. */
102275970Scy	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
103275970Scy	    ("m_move_pkthdr: to has tags"));
104275970Scy#endif
105275970Scy	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
106275970Scy#ifdef MAC
107275970Scy	/*
108275970Scy	 * XXXMAC: It could be this should also occur for non-MAC?
109275970Scy	 */
110275970Scy	if (to->m_flags & M_PKTHDR)
111275970Scy		m_tag_delete_chain(to, NULL);
112275970Scy#endif
113275970Scy	to->m_flags = from->m_flags & M_COPYFLAGS;
114275970Scy	to->m_data = to->m_pktdat;
115275970Scy	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
116275970Scy	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
117275970Scy	from->m_flags &= ~M_PKTHDR;
118275970Scy}
119275970Scy
120275970Scy/*
121275970Scy * Duplicate "from"'s mbuf pkthdr in "to".
122275970Scy * "from" must have M_PKTHDR set, and "to" must be empty.
123275970Scy * In particular, this does a deep copy of the packet tags.
124275970Scy */
125275970Scyint
126275970Scym_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
127275970Scy{
128275970Scy
129275970Scy#if 0
130275970Scy	/*
131275970Scy	 * The mbuf allocator only initializes the pkthdr
132275970Scy	 * when the mbuf is allocated with MGETHDR. Many users
133275970Scy	 * (e.g. m_copy*, m_prepend) use MGET and then
134275970Scy	 * smash the pkthdr as needed causing these
135275970Scy	 * assertions to trip.  For now just disable them.
136275970Scy	 */
137275970Scy	M_ASSERTPKTHDR(to);
138275970Scy	/* Note: with MAC, this may not be a good assertion. */
139275970Scy	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
140275970Scy#endif
141275970Scy#ifdef MAC
142275970Scy	if (to->m_flags & M_PKTHDR)
143275970Scy		m_tag_delete_chain(to, NULL);
144275970Scy#endif
145275970Scy	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
146275970Scy	if ((to->m_flags & M_EXT) == 0)
147275970Scy		to->m_data = to->m_pktdat;
148275970Scy	to->m_pkthdr = from->m_pkthdr;
149275970Scy	SLIST_INIT(&to->m_pkthdr.tags);
150275970Scy	return (m_tag_copy_chain(to, from, MBTOM(how)));
151275970Scy}
152275970Scy
153275970Scy/*
154275970Scy * Lesser-used path for M_PREPEND:
155275970Scy * allocate new mbuf to prepend to chain,
156275970Scy * copy junk along.
157275970Scy */
158275970Scystruct mbuf *
159275970Scym_prepend(struct mbuf *m, int len, int how)
160275970Scy{
161275970Scy	struct mbuf *mn;
162275970Scy
163275970Scy	MGET(mn, how, m->m_type);
164275970Scy	if (mn == NULL) {
165275970Scy		m_freem(m);
166275970Scy		return (NULL);
167275970Scy	}
168275970Scy	if (m->m_flags & M_PKTHDR)
169275970Scy		M_MOVE_PKTHDR(mn, m);
170275970Scy	mn->m_next = m;
171275970Scy	m = mn;
172275970Scy	if (len < MHLEN)
173275970Scy		MH_ALIGN(m, len);
174275970Scy	m->m_len = len;
175275970Scy	return (m);
176275970Scy}
177275970Scy
178275970Scy/*
179275970Scy * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
180275970Scy * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
181275970Scy * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
182275970Scy * Note that the copy is read-only, because clusters are not copied,
183275970Scy * only their reference counts are incremented.
184275970Scy */
185275970Scystruct mbuf *
186275970Scym_copym(struct mbuf *m, int off0, int len, int wait)
187275970Scy{
188275970Scy	struct mbuf *n, **np;
189275970Scy	int off = off0;
190275970Scy	struct mbuf *top;
191275970Scy	int copyhdr = 0;
192275970Scy
193275970Scy	KASSERT(off >= 0, ("m_copym, negative off %d", off));
194275970Scy	KASSERT(len >= 0, ("m_copym, negative len %d", len));
195275970Scy	if (off == 0 && m->m_flags & M_PKTHDR)
196275970Scy		copyhdr = 1;
197275970Scy	while (off > 0) {
198275970Scy		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
199275970Scy		if (off < m->m_len)
200275970Scy			break;
201275970Scy		off -= m->m_len;
202275970Scy		m = m->m_next;
203275970Scy	}
204275970Scy	np = &top;
205275970Scy	top = 0;
206275970Scy	while (len > 0) {
207275970Scy		if (m == NULL) {
208275970Scy			KASSERT(len == M_COPYALL,
209275970Scy			    ("m_copym, length > size of mbuf chain"));
210275970Scy			break;
211275970Scy		}
212275970Scy		MGET(n, wait, m->m_type);
213275970Scy		*np = n;
214275970Scy		if (n == NULL)
215275970Scy			goto nospace;
216275970Scy		if (copyhdr) {
217275970Scy			if (!m_dup_pkthdr(n, m, wait))
218275970Scy				goto nospace;
219275970Scy			if (len == M_COPYALL)
220275970Scy				n->m_pkthdr.len -= off0;
221275970Scy			else
222275970Scy				n->m_pkthdr.len = len;
223275970Scy			copyhdr = 0;
224275970Scy		}
225275970Scy		n->m_len = min(len, m->m_len - off);
226275970Scy		if (m->m_flags & M_EXT) {
227275970Scy			n->m_data = m->m_data + off;
228275970Scy			n->m_ext = m->m_ext;
229275970Scy			n->m_flags |= M_EXT;
230275970Scy			MEXT_ADD_REF(m);
231275970Scy		} else
232275970Scy			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
233275970Scy			    (u_int)n->m_len);
234275970Scy		if (len != M_COPYALL)
235275970Scy			len -= n->m_len;
236275970Scy		off = 0;
237275970Scy		m = m->m_next;
238275970Scy		np = &n->m_next;
239275970Scy	}
240275970Scy	if (top == NULL)
241275970Scy		mbstat.m_mcfail++;	/* XXX: No consistency. */
242275970Scy
243275970Scy	return (top);
244275970Scynospace:
245275970Scy	m_freem(top);
246275970Scy	mbstat.m_mcfail++;	/* XXX: No consistency. */
247275970Scy	return (NULL);
248275970Scy}
249275970Scy
250275970Scy/*
251275970Scy * Copy an entire packet, including header (which must be present).
252275970Scy * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
253275970Scy * Note that the copy is read-only, because clusters are not copied,
254275970Scy * only their reference counts are incremented.
255275970Scy * Preserve alignment of the first mbuf so if the creator has left
256275970Scy * some room at the beginning (e.g. for inserting protocol headers)
257275970Scy * the copies still have the room available.
258275970Scy */
259275970Scystruct mbuf *
260275970Scym_copypacket(struct mbuf *m, int how)
261275970Scy{
262275970Scy	struct mbuf *top, *n, *o;
263275970Scy
264275970Scy	MGET(n, how, m->m_type);
265275970Scy	top = n;
266275970Scy	if (n == NULL)
267275970Scy		goto nospace;
268275970Scy
269275970Scy	if (!m_dup_pkthdr(n, m, how))
270275970Scy		goto nospace;
271275970Scy	n->m_len = m->m_len;
272275970Scy	if (m->m_flags & M_EXT) {
273275970Scy		n->m_data = m->m_data;
274275970Scy		n->m_ext = m->m_ext;
275275970Scy		n->m_flags |= M_EXT;
276275970Scy		MEXT_ADD_REF(m);
277275970Scy	} else {
278275970Scy		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
279275970Scy		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
280275970Scy	}
281275970Scy
282275970Scy	m = m->m_next;
283275970Scy	while (m) {
284275970Scy		MGET(o, how, m->m_type);
285275970Scy		if (o == NULL)
286275970Scy			goto nospace;
287275970Scy
288275970Scy		n->m_next = o;
289275970Scy		n = n->m_next;
290275970Scy
291275970Scy		n->m_len = m->m_len;
292275970Scy		if (m->m_flags & M_EXT) {
293275970Scy			n->m_data = m->m_data;
294275970Scy			n->m_ext = m->m_ext;
295275970Scy			n->m_flags |= M_EXT;
296275970Scy			MEXT_ADD_REF(m);
297275970Scy		} else {
298275970Scy			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
299275970Scy		}
300275970Scy
301275970Scy		m = m->m_next;
302275970Scy	}
303275970Scy	return top;
304275970Scynospace:
305275970Scy	m_freem(top);
306275970Scy	mbstat.m_mcfail++;	/* XXX: No consistency. */
307275970Scy	return (NULL);
308275970Scy}
309275970Scy
310275970Scy/*
311275970Scy * Copy data from an mbuf chain starting "off" bytes from the beginning,
312275970Scy * continuing for "len" bytes, into the indicated buffer.
313275970Scy */
314275970Scyvoid
315275970Scym_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
316275970Scy{
317275970Scy	u_int count;
318275970Scy
319275970Scy	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
320275970Scy	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
321275970Scy	while (off > 0) {
322275970Scy		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
323275970Scy		if (off < m->m_len)
324275970Scy			break;
325275970Scy		off -= m->m_len;
326275970Scy		m = m->m_next;
327275970Scy	}
328275970Scy	while (len > 0) {
329275970Scy		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
330275970Scy		count = min(m->m_len - off, len);
331275970Scy		bcopy(mtod(m, caddr_t) + off, cp, count);
332275970Scy		len -= count;
333275970Scy		cp += count;
334275970Scy		off = 0;
335275970Scy		m = m->m_next;
336275970Scy	}
337275970Scy}
338275970Scy
339275970Scy/*
340275970Scy * Copy a packet header mbuf chain into a completely new chain, including
341275970Scy * copying any mbuf clusters.  Use this instead of m_copypacket() when
342275970Scy * you need a writable copy of an mbuf chain.
343275970Scy */
344275970Scystruct mbuf *
345275970Scym_dup(struct mbuf *m, int how)
346275970Scy{
347275970Scy	struct mbuf **p, *top = NULL;
348275970Scy	int remain, moff, nsize;
349275970Scy
350275970Scy	/* Sanity check */
351275970Scy	if (m == NULL)
352275970Scy		return (NULL);
353275970Scy	M_ASSERTPKTHDR(m);
354275970Scy
355275970Scy	/* While there's more data, get a new mbuf, tack it on, and fill it */
356275970Scy	remain = m->m_pkthdr.len;
357275970Scy	moff = 0;
358275970Scy	p = &top;
359275970Scy	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
360275970Scy		struct mbuf *n;
361275970Scy
362275970Scy		/* Get the next new mbuf */
363275970Scy		MGET(n, how, m->m_type);
364275970Scy		if (n == NULL)
365275970Scy			goto nospace;
366275970Scy		if (top == NULL) {		/* first one, must be PKTHDR */
367275970Scy			if (!m_dup_pkthdr(n, m, how))
368275970Scy				goto nospace;
369275970Scy			nsize = MHLEN;
370275970Scy		} else				/* not the first one */
371275970Scy			nsize = MLEN;
372275970Scy		if (remain >= MINCLSIZE) {
373275970Scy			MCLGET(n, how);
374275970Scy			if ((n->m_flags & M_EXT) == 0) {
375275970Scy				(void)m_free(n);
376275970Scy				goto nospace;
377275970Scy			}
378275970Scy			nsize = MCLBYTES;
379275970Scy		}
380275970Scy		n->m_len = 0;
381275970Scy
382275970Scy		/* Link it into the new chain */
383275970Scy		*p = n;
384275970Scy		p = &n->m_next;
385275970Scy
386275970Scy		/* Copy data from original mbuf(s) into new mbuf */
387275970Scy		while (n->m_len < nsize && m != NULL) {
388275970Scy			int chunk = min(nsize - n->m_len, m->m_len - moff);
389275970Scy
390275970Scy			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
391275970Scy			moff += chunk;
392275970Scy			n->m_len += chunk;
393275970Scy			remain -= chunk;
394275970Scy			if (moff == m->m_len) {
395282408Scy				m = m->m_next;
396275970Scy				moff = 0;
397275970Scy			}
398275970Scy		}
399275970Scy
400275970Scy		/* Check correct total mbuf length */
401275970Scy		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
402275970Scy		    	("%s: bogus m_pkthdr.len", __func__));
403275970Scy	}
404275970Scy	return (top);
405275970Scy
406275970Scynospace:
407275970Scy	m_freem(top);
408275970Scy	mbstat.m_mcfail++;	/* XXX: No consistency. */
409275970Scy	return (NULL);
410275970Scy}
411275970Scy
412275970Scy/*
413275970Scy * Concatenate mbuf chain n to m.
414275970Scy * Both chains must be of the same type (e.g. MT_DATA).
415275970Scy * Any m_pkthdr is not updated.
416275970Scy */
417275970Scyvoid
418275970Scym_cat(struct mbuf *m, struct mbuf *n)
419275970Scy{
420275970Scy	while (m->m_next)
421275970Scy		m = m->m_next;
422275970Scy	while (n) {
423275970Scy		if (m->m_flags & M_EXT ||
424275970Scy		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
425275970Scy			/* just join the two chains */
426275970Scy			m->m_next = n;
427275970Scy			return;
428275970Scy		}
429275970Scy		/* splat the data from one into the other */
430275970Scy		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
431275970Scy		    (u_int)n->m_len);
432275970Scy		m->m_len += n->m_len;
433275970Scy		n = m_free(n);
434275970Scy	}
435275970Scy}
436275970Scy
437275970Scyvoid
438275970Scym_adj(struct mbuf *mp, int req_len)
439275970Scy{
440275970Scy	int len = req_len;
441275970Scy	struct mbuf *m;
442275970Scy	int count;
443275970Scy
444275970Scy	if ((m = mp) == NULL)
445275970Scy		return;
446275970Scy	if (len >= 0) {
447275970Scy		/*
448275970Scy		 * Trim from head.
449275970Scy		 */
450275970Scy		while (m != NULL && len > 0) {
451275970Scy			if (m->m_len <= len) {
452275970Scy				len -= m->m_len;
453275970Scy				m->m_len = 0;
454275970Scy				m = m->m_next;
455275970Scy			} else {
456275970Scy				m->m_len -= len;
457275970Scy				m->m_data += len;
458275970Scy				len = 0;
459275970Scy			}
460275970Scy		}
461275970Scy		m = mp;
462275970Scy		if (mp->m_flags & M_PKTHDR)
463275970Scy			m->m_pkthdr.len -= (req_len - len);
464275970Scy	} else {
465275970Scy		/*
466275970Scy		 * Trim from tail.  Scan the mbuf chain,
467275970Scy		 * calculating its length and finding the last mbuf.
468275970Scy		 * If the adjustment only affects this mbuf, then just
469275970Scy		 * adjust and return.  Otherwise, rescan and truncate
470275970Scy		 * after the remaining size.
471275970Scy		 */
472275970Scy		len = -len;
473275970Scy		count = 0;
474275970Scy		for (;;) {
475275970Scy			count += m->m_len;
476275970Scy			if (m->m_next == (struct mbuf *)0)
477275970Scy				break;
478275970Scy			m = m->m_next;
479275970Scy		}
480		if (m->m_len >= len) {
481			m->m_len -= len;
482			if (mp->m_flags & M_PKTHDR)
483				mp->m_pkthdr.len -= len;
484			return;
485		}
486		count -= len;
487		if (count < 0)
488			count = 0;
489		/*
490		 * Correct length for chain is "count".
491		 * Find the mbuf with last data, adjust its length,
492		 * and toss data from remaining mbufs on chain.
493		 */
494		m = mp;
495		if (m->m_flags & M_PKTHDR)
496			m->m_pkthdr.len = count;
497		for (; m; m = m->m_next) {
498			if (m->m_len >= count) {
499				m->m_len = count;
500				break;
501			}
502			count -= m->m_len;
503		}
504		while (m->m_next)
505			(m = m->m_next) ->m_len = 0;
506	}
507}
508
509/*
510 * Rearange an mbuf chain so that len bytes are contiguous
511 * and in the data area of an mbuf (so that mtod and dtom
512 * will work for a structure of size len).  Returns the resulting
513 * mbuf chain on success, frees it and returns null on failure.
514 * If there is room, it will add up to max_protohdr-len extra bytes to the
515 * contiguous region in an attempt to avoid being called next time.
516 */
517struct mbuf *
518m_pullup(struct mbuf *n, int len)
519{
520	struct mbuf *m;
521	int count;
522	int space;
523
524	/*
525	 * If first mbuf has no cluster, and has room for len bytes
526	 * without shifting current data, pullup into it,
527	 * otherwise allocate a new mbuf to prepend to the chain.
528	 */
529	if ((n->m_flags & M_EXT) == 0 &&
530	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
531		if (n->m_len >= len)
532			return (n);
533		m = n;
534		n = n->m_next;
535		len -= m->m_len;
536	} else {
537		if (len > MHLEN)
538			goto bad;
539		MGET(m, M_DONTWAIT, n->m_type);
540		if (m == NULL)
541			goto bad;
542		m->m_len = 0;
543		if (n->m_flags & M_PKTHDR)
544			M_MOVE_PKTHDR(m, n);
545	}
546	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
547	do {
548		count = min(min(max(len, max_protohdr), space), n->m_len);
549		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
550		  (u_int)count);
551		len -= count;
552		m->m_len += count;
553		n->m_len -= count;
554		space -= count;
555		if (n->m_len)
556			n->m_data += count;
557		else
558			n = m_free(n);
559	} while (len > 0 && n);
560	if (len > 0) {
561		(void) m_free(m);
562		goto bad;
563	}
564	m->m_next = n;
565	return (m);
566bad:
567	m_freem(n);
568	mbstat.m_mpfail++;	/* XXX: No consistency. */
569	return (NULL);
570}
571
572/*
573 * Partition an mbuf chain in two pieces, returning the tail --
574 * all but the first len0 bytes.  In case of failure, it returns NULL and
575 * attempts to restore the chain to its original state.
576 *
577 * Note that the resulting mbufs might be read-only, because the new
578 * mbuf can end up sharing an mbuf cluster with the original mbuf if
579 * the "breaking point" happens to lie within a cluster mbuf. Use the
580 * M_WRITABLE() macro to check for this case.
581 */
582struct mbuf *
583m_split(struct mbuf *m0, int len0, int wait)
584{
585	struct mbuf *m, *n;
586	u_int len = len0, remain;
587
588	for (m = m0; m && len > m->m_len; m = m->m_next)
589		len -= m->m_len;
590	if (m == NULL)
591		return (NULL);
592	remain = m->m_len - len;
593	if (m0->m_flags & M_PKTHDR) {
594		MGETHDR(n, wait, m0->m_type);
595		if (n == NULL)
596			return (NULL);
597		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
598		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
599		m0->m_pkthdr.len = len0;
600		if (m->m_flags & M_EXT)
601			goto extpacket;
602		if (remain > MHLEN) {
603			/* m can't be the lead packet */
604			MH_ALIGN(n, 0);
605			n->m_next = m_split(m, len, wait);
606			if (n->m_next == NULL) {
607				(void) m_free(n);
608				return (NULL);
609			} else {
610				n->m_len = 0;
611				return (n);
612			}
613		} else
614			MH_ALIGN(n, remain);
615	} else if (remain == 0) {
616		n = m->m_next;
617		m->m_next = NULL;
618		return (n);
619	} else {
620		MGET(n, wait, m->m_type);
621		if (n == NULL)
622			return (NULL);
623		M_ALIGN(n, remain);
624	}
625extpacket:
626	if (m->m_flags & M_EXT) {
627		n->m_flags |= M_EXT;
628		n->m_ext = m->m_ext;
629		MEXT_ADD_REF(m);
630		n->m_data = m->m_data + len;
631	} else {
632		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
633	}
634	n->m_len = remain;
635	m->m_len = len;
636	n->m_next = m->m_next;
637	m->m_next = NULL;
638	return (n);
639}
640/*
641 * Routine to copy from device local memory into mbufs.
642 * Note that `off' argument is offset into first mbuf of target chain from
643 * which to begin copying the data to.
644 */
645struct mbuf *
646m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
647	 void (*copy)(char *from, caddr_t to, u_int len))
648{
649	struct mbuf *m;
650	struct mbuf *top = 0, **mp = &top;
651	int len;
652
653	if (off < 0 || off > MHLEN)
654		return (NULL);
655
656	MGETHDR(m, M_DONTWAIT, MT_DATA);
657	if (m == NULL)
658		return (NULL);
659	m->m_pkthdr.rcvif = ifp;
660	m->m_pkthdr.len = totlen;
661	len = MHLEN;
662
663	while (totlen > 0) {
664		if (top) {
665			MGET(m, M_DONTWAIT, MT_DATA);
666			if (m == NULL) {
667				m_freem(top);
668				return (NULL);
669			}
670			len = MLEN;
671		}
672		if (totlen + off >= MINCLSIZE) {
673			MCLGET(m, M_DONTWAIT);
674			if (m->m_flags & M_EXT)
675				len = MCLBYTES;
676		} else {
677			/*
678			 * Place initial small packet/header at end of mbuf.
679			 */
680			if (top == NULL && totlen + off + max_linkhdr <= len) {
681				m->m_data += max_linkhdr;
682				len -= max_linkhdr;
683			}
684		}
685		if (off) {
686			m->m_data += off;
687			len -= off;
688			off = 0;
689		}
690		m->m_len = len = min(totlen, len);
691		if (copy)
692			copy(buf, mtod(m, caddr_t), (u_int)len);
693		else
694			bcopy(buf, mtod(m, caddr_t), (u_int)len);
695		buf += len;
696		*mp = m;
697		mp = &m->m_next;
698		totlen -= len;
699	}
700	return (top);
701}
702
703/*
704 * Copy data from a buffer back into the indicated mbuf chain,
705 * starting "off" bytes from the beginning, extending the mbuf
706 * chain if necessary.
707 */
708void
709m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
710{
711	int mlen;
712	struct mbuf *m = m0, *n;
713	int totlen = 0;
714
715	if (m0 == NULL)
716		return;
717	while (off > (mlen = m->m_len)) {
718		off -= mlen;
719		totlen += mlen;
720		if (m->m_next == NULL) {
721			n = m_get_clrd(M_DONTWAIT, m->m_type);
722			if (n == NULL)
723				goto out;
724			n->m_len = min(MLEN, len + off);
725			m->m_next = n;
726		}
727		m = m->m_next;
728	}
729	while (len > 0) {
730		mlen = min (m->m_len - off, len);
731		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
732		cp += mlen;
733		len -= mlen;
734		mlen += off;
735		off = 0;
736		totlen += mlen;
737		if (len == 0)
738			break;
739		if (m->m_next == NULL) {
740			n = m_get(M_DONTWAIT, m->m_type);
741			if (n == NULL)
742				break;
743			n->m_len = min(MLEN, len);
744			m->m_next = n;
745		}
746		m = m->m_next;
747	}
748out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
749		m->m_pkthdr.len = totlen;
750}
751
752void
753m_print(const struct mbuf *m)
754{
755	int len;
756	const struct mbuf *m2;
757
758	len = m->m_pkthdr.len;
759	m2 = m;
760	while (len) {
761		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
762		len -= m2->m_len;
763		m2 = m2->m_next;
764	}
765	return;
766}
767
768u_int
769m_fixhdr(struct mbuf *m0)
770{
771	u_int len;
772
773	len = m_length(m0, NULL);
774	m0->m_pkthdr.len = len;
775	return (len);
776}
777
778u_int
779m_length(struct mbuf *m0, struct mbuf **last)
780{
781	struct mbuf *m;
782	u_int len;
783
784	len = 0;
785	for (m = m0; m != NULL; m = m->m_next) {
786		len += m->m_len;
787		if (m->m_next == NULL)
788			break;
789	}
790	if (last != NULL)
791		*last = m;
792	return (len);
793}
794
795/*
796 * Defragment a mbuf chain, returning the shortest possible
797 * chain of mbufs and clusters.  If allocation fails and
798 * this cannot be completed, NULL will be returned, but
799 * the passed in chain will be unchanged.  Upon success,
800 * the original chain will be freed, and the new chain
801 * will be returned.
802 *
803 * If a non-packet header is passed in, the original
804 * mbuf (chain?) will be returned unharmed.
805 */
806struct mbuf *
807m_defrag(struct mbuf *m0, int how)
808{
809	struct mbuf	*m_new = NULL, *m_final = NULL;
810	int		progress = 0, length;
811
812	if (!(m0->m_flags & M_PKTHDR))
813		return (m0);
814
815#ifdef MBUF_STRESS_TEST
816	if (m_defragrandomfailures) {
817		int temp = arc4random() & 0xff;
818		if (temp == 0xba)
819			goto nospace;
820	}
821#endif
822
823	if (m0->m_pkthdr.len > MHLEN)
824		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
825	else
826		m_final = m_gethdr(how, MT_DATA);
827
828	if (m_final == NULL)
829		goto nospace;
830
831	if (m_dup_pkthdr(m_final, m0, how) == NULL)
832		goto nospace;
833
834	m_new = m_final;
835
836	while (progress < m0->m_pkthdr.len) {
837		length = m0->m_pkthdr.len - progress;
838		if (length > MCLBYTES)
839			length = MCLBYTES;
840
841		if (m_new == NULL) {
842			if (length > MLEN)
843				m_new = m_getcl(how, MT_DATA, 0);
844			else
845				m_new = m_get(how, MT_DATA);
846			if (m_new == NULL)
847				goto nospace;
848		}
849
850		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
851		progress += length;
852		m_new->m_len = length;
853		if (m_new != m_final)
854			m_cat(m_final, m_new);
855		m_new = NULL;
856	}
857	if (m0->m_next == NULL)
858		m_defraguseless++;
859	m_freem(m0);
860	m0 = m_final;
861	m_defragpackets++;
862	m_defragbytes += m0->m_pkthdr.len;
863	return (m0);
864nospace:
865	m_defragfailure++;
866	if (m_new)
867		m_free(m_new);
868	if (m_final)
869		m_freem(m_final);
870	return (NULL);
871}
872