uipc_mbuf.c revision 193309
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 193309 2009-06-02 15:59:46Z rwatson $");
34
35#include "opt_mac.h"
36#include "opt_param.h"
37#include "opt_mbuf_stress_test.h"
38#include "opt_mbuf_profiling.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/sysctl.h>
48#include <sys/domain.h>
49#include <sys/protosw.h>
50#include <sys/uio.h>
51
52int	max_linkhdr;
53int	max_protohdr;
54int	max_hdr;
55int	max_datalen;
56#ifdef MBUF_STRESS_TEST
57int	m_defragpackets;
58int	m_defragbytes;
59int	m_defraguseless;
60int	m_defragfailure;
61int	m_defragrandomfailures;
62#endif
63
64/*
65 * sysctl(8) exported objects
66 */
67SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD,
68	   &max_linkhdr, 0, "Size of largest link layer header");
69SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD,
70	   &max_protohdr, 0, "Size of largest protocol layer header");
71SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD,
72	   &max_hdr, 0, "Size of largest link plus protocol header");
73SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD,
74	   &max_datalen, 0, "Minimum space left in mbuf after max_hdr");
75#ifdef MBUF_STRESS_TEST
76SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
77	   &m_defragpackets, 0, "");
78SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
79	   &m_defragbytes, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
81	   &m_defraguseless, 0, "");
82SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
83	   &m_defragfailure, 0, "");
84SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
85	   &m_defragrandomfailures, 0, "");
86#endif
87
88/*
89 * Allocate a given length worth of mbufs and/or clusters (whatever fits
90 * best) and return a pointer to the top of the allocated chain.  If an
91 * existing mbuf chain is provided, then we will append the new chain
92 * to the existing one but still return the top of the newly allocated
93 * chain.
94 */
95struct mbuf *
96m_getm2(struct mbuf *m, int len, int how, short type, int flags)
97{
98	struct mbuf *mb, *nm = NULL, *mtail = NULL;
99
100	KASSERT(len >= 0, ("%s: len is < 0", __func__));
101
102	/* Validate flags. */
103	flags &= (M_PKTHDR | M_EOR);
104
105	/* Packet header mbuf must be first in chain. */
106	if ((flags & M_PKTHDR) && m != NULL)
107		flags &= ~M_PKTHDR;
108
109	/* Loop and append maximum sized mbufs to the chain tail. */
110	while (len > 0) {
111		if (len > MCLBYTES)
112			mb = m_getjcl(how, type, (flags & M_PKTHDR),
113			    MJUMPAGESIZE);
114		else if (len >= MINCLSIZE)
115			mb = m_getcl(how, type, (flags & M_PKTHDR));
116		else if (flags & M_PKTHDR)
117			mb = m_gethdr(how, type);
118		else
119			mb = m_get(how, type);
120
121		/* Fail the whole operation if one mbuf can't be allocated. */
122		if (mb == NULL) {
123			if (nm != NULL)
124				m_freem(nm);
125			return (NULL);
126		}
127
128		/* Book keeping. */
129		len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size :
130			((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN);
131		if (mtail != NULL)
132			mtail->m_next = mb;
133		else
134			nm = mb;
135		mtail = mb;
136		flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
137	}
138	if (flags & M_EOR)
139		mtail->m_flags |= M_EOR;  /* Only valid on the last mbuf. */
140
141	/* If mbuf was supplied, append new chain to the end of it. */
142	if (m != NULL) {
143		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
144			;
145		mtail->m_next = nm;
146		mtail->m_flags &= ~M_EOR;
147	} else
148		m = nm;
149
150	return (m);
151}
152
153/*
154 * Free an entire chain of mbufs and associated external buffers, if
155 * applicable.
156 */
157void
158m_freem(struct mbuf *mb)
159{
160
161	while (mb != NULL)
162		mb = m_free(mb);
163}
164
165/*-
166 * Configure a provided mbuf to refer to the provided external storage
167 * buffer and setup a reference count for said buffer.  If the setting
168 * up of the reference count fails, the M_EXT bit will not be set.  If
169 * successfull, the M_EXT bit is set in the mbuf's flags.
170 *
171 * Arguments:
172 *    mb     The existing mbuf to which to attach the provided buffer.
173 *    buf    The address of the provided external storage buffer.
174 *    size   The size of the provided buffer.
175 *    freef  A pointer to a routine that is responsible for freeing the
176 *           provided external storage buffer.
177 *    args   A pointer to an argument structure (of any type) to be passed
178 *           to the provided freef routine (may be NULL).
179 *    flags  Any other flags to be passed to the provided mbuf.
180 *    type   The type that the external storage buffer should be
181 *           labeled with.
182 *
183 * Returns:
184 *    Nothing.
185 */
186void
187m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
188    void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type)
189{
190	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
191
192	if (type != EXT_EXTREF)
193		mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
194	if (mb->m_ext.ref_cnt != NULL) {
195		*(mb->m_ext.ref_cnt) = 1;
196		mb->m_flags |= (M_EXT | flags);
197		mb->m_ext.ext_buf = buf;
198		mb->m_data = mb->m_ext.ext_buf;
199		mb->m_ext.ext_size = size;
200		mb->m_ext.ext_free = freef;
201		mb->m_ext.ext_arg1 = arg1;
202		mb->m_ext.ext_arg2 = arg2;
203		mb->m_ext.ext_type = type;
204        }
205}
206
207/*
208 * Non-directly-exported function to clean up after mbufs with M_EXT
209 * storage attached to them if the reference count hits 1.
210 */
211void
212mb_free_ext(struct mbuf *m)
213{
214	int skipmbuf;
215
216	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
217	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
218
219
220	/*
221	 * check if the header is embedded in the cluster
222	 */
223	skipmbuf = (m->m_flags & M_NOFREE);
224
225	/* Free attached storage if this mbuf is the only reference to it. */
226	if (*(m->m_ext.ref_cnt) == 1 ||
227	    atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
228		switch (m->m_ext.ext_type) {
229		case EXT_PACKET:	/* The packet zone is special. */
230			if (*(m->m_ext.ref_cnt) == 0)
231				*(m->m_ext.ref_cnt) = 1;
232			uma_zfree(zone_pack, m);
233			return;		/* Job done. */
234		case EXT_CLUSTER:
235			uma_zfree(zone_clust, m->m_ext.ext_buf);
236			break;
237		case EXT_JUMBOP:
238			uma_zfree(zone_jumbop, m->m_ext.ext_buf);
239			break;
240		case EXT_JUMBO9:
241			uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
242			break;
243		case EXT_JUMBO16:
244			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
245			break;
246		case EXT_SFBUF:
247		case EXT_NET_DRV:
248		case EXT_MOD_TYPE:
249		case EXT_DISPOSABLE:
250			*(m->m_ext.ref_cnt) = 0;
251			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
252				m->m_ext.ref_cnt));
253			/* FALLTHROUGH */
254		case EXT_EXTREF:
255			KASSERT(m->m_ext.ext_free != NULL,
256				("%s: ext_free not set", __func__));
257			(*(m->m_ext.ext_free))(m->m_ext.ext_arg1,
258			    m->m_ext.ext_arg2);
259			break;
260		default:
261			KASSERT(m->m_ext.ext_type == 0,
262				("%s: unknown ext_type", __func__));
263		}
264	}
265	if (skipmbuf)
266		return;
267
268	/*
269	 * Free this mbuf back to the mbuf zone with all m_ext
270	 * information purged.
271	 */
272	m->m_ext.ext_buf = NULL;
273	m->m_ext.ext_free = NULL;
274	m->m_ext.ext_arg1 = NULL;
275	m->m_ext.ext_arg2 = NULL;
276	m->m_ext.ref_cnt = NULL;
277	m->m_ext.ext_size = 0;
278	m->m_ext.ext_type = 0;
279	m->m_flags &= ~M_EXT;
280	uma_zfree(zone_mbuf, m);
281}
282
283/*
284 * Attach the the cluster from *m to *n, set up m_ext in *n
285 * and bump the refcount of the cluster.
286 */
287static void
288mb_dupcl(struct mbuf *n, struct mbuf *m)
289{
290	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
291	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
292	KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
293
294	if (*(m->m_ext.ref_cnt) == 1)
295		*(m->m_ext.ref_cnt) += 1;
296	else
297		atomic_add_int(m->m_ext.ref_cnt, 1);
298	n->m_ext.ext_buf = m->m_ext.ext_buf;
299	n->m_ext.ext_free = m->m_ext.ext_free;
300	n->m_ext.ext_arg1 = m->m_ext.ext_arg1;
301	n->m_ext.ext_arg2 = m->m_ext.ext_arg2;
302	n->m_ext.ext_size = m->m_ext.ext_size;
303	n->m_ext.ref_cnt = m->m_ext.ref_cnt;
304	n->m_ext.ext_type = m->m_ext.ext_type;
305	n->m_flags |= M_EXT;
306}
307
308/*
309 * Clean up mbuf (chain) from any tags and packet headers.
310 * If "all" is set then the first mbuf in the chain will be
311 * cleaned too.
312 */
313void
314m_demote(struct mbuf *m0, int all)
315{
316	struct mbuf *m;
317
318	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
319		if (m->m_flags & M_PKTHDR) {
320			m_tag_delete_chain(m, NULL);
321			m->m_flags &= ~M_PKTHDR;
322			bzero(&m->m_pkthdr, sizeof(struct pkthdr));
323		}
324		if (m->m_type == MT_HEADER)
325			m->m_type = MT_DATA;
326		if (m != m0 && m->m_nextpkt != NULL)
327			m->m_nextpkt = NULL;
328		m->m_flags = m->m_flags & (M_EXT|M_EOR|M_RDONLY|M_FREELIST);
329	}
330}
331
332/*
333 * Sanity checks on mbuf (chain) for use in KASSERT() and general
334 * debugging.
335 * Returns 0 or panics when bad and 1 on all tests passed.
336 * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
337 * blow up later.
338 */
339int
340m_sanity(struct mbuf *m0, int sanitize)
341{
342	struct mbuf *m;
343	caddr_t a, b;
344	int pktlen = 0;
345
346#ifdef INVARIANTS
347#define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m)
348#else
349#define	M_SANITY_ACTION(s)	printf("mbuf %p: " s, m)
350#endif
351
352	for (m = m0; m != NULL; m = m->m_next) {
353		/*
354		 * Basic pointer checks.  If any of these fails then some
355		 * unrelated kernel memory before or after us is trashed.
356		 * No way to recover from that.
357		 */
358		a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf :
359			((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) :
360			 (caddr_t)(&m->m_dat)) );
361		b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size :
362			((m->m_flags & M_PKTHDR) ? MHLEN : MLEN)));
363		if ((caddr_t)m->m_data < a)
364			M_SANITY_ACTION("m_data outside mbuf data range left");
365		if ((caddr_t)m->m_data > b)
366			M_SANITY_ACTION("m_data outside mbuf data range right");
367		if ((caddr_t)m->m_data + m->m_len > b)
368			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
369		if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.header) {
370			if ((caddr_t)m->m_pkthdr.header < a ||
371			    (caddr_t)m->m_pkthdr.header > b)
372				M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
373		}
374
375		/* m->m_nextpkt may only be set on first mbuf in chain. */
376		if (m != m0 && m->m_nextpkt != NULL) {
377			if (sanitize) {
378				m_freem(m->m_nextpkt);
379				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
380			} else
381				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
382		}
383
384		/* packet length (not mbuf length!) calculation */
385		if (m0->m_flags & M_PKTHDR)
386			pktlen += m->m_len;
387
388		/* m_tags may only be attached to first mbuf in chain. */
389		if (m != m0 && m->m_flags & M_PKTHDR &&
390		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
391			if (sanitize) {
392				m_tag_delete_chain(m, NULL);
393				/* put in 0xDEADC0DE perhaps? */
394			} else
395				M_SANITY_ACTION("m_tags on in-chain mbuf");
396		}
397
398		/* M_PKTHDR may only be set on first mbuf in chain */
399		if (m != m0 && m->m_flags & M_PKTHDR) {
400			if (sanitize) {
401				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
402				m->m_flags &= ~M_PKTHDR;
403				/* put in 0xDEADCODE and leave hdr flag in */
404			} else
405				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
406		}
407	}
408	m = m0;
409	if (pktlen && pktlen != m->m_pkthdr.len) {
410		if (sanitize)
411			m->m_pkthdr.len = 0;
412		else
413			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
414	}
415	return 1;
416
417#undef	M_SANITY_ACTION
418}
419
420
421/*
422 * "Move" mbuf pkthdr from "from" to "to".
423 * "from" must have M_PKTHDR set, and "to" must be empty.
424 */
425void
426m_move_pkthdr(struct mbuf *to, struct mbuf *from)
427{
428
429#if 0
430	/* see below for why these are not enabled */
431	M_ASSERTPKTHDR(to);
432	/* Note: with MAC, this may not be a good assertion. */
433	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
434	    ("m_move_pkthdr: to has tags"));
435#endif
436#ifdef MAC
437	/*
438	 * XXXMAC: It could be this should also occur for non-MAC?
439	 */
440	if (to->m_flags & M_PKTHDR)
441		m_tag_delete_chain(to, NULL);
442#endif
443	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
444	if ((to->m_flags & M_EXT) == 0)
445		to->m_data = to->m_pktdat;
446	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
447	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
448	from->m_flags &= ~M_PKTHDR;
449}
450
451/*
452 * Duplicate "from"'s mbuf pkthdr in "to".
453 * "from" must have M_PKTHDR set, and "to" must be empty.
454 * In particular, this does a deep copy of the packet tags.
455 */
456int
457m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
458{
459
460#if 0
461	/*
462	 * The mbuf allocator only initializes the pkthdr
463	 * when the mbuf is allocated with MGETHDR. Many users
464	 * (e.g. m_copy*, m_prepend) use MGET and then
465	 * smash the pkthdr as needed causing these
466	 * assertions to trip.  For now just disable them.
467	 */
468	M_ASSERTPKTHDR(to);
469	/* Note: with MAC, this may not be a good assertion. */
470	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
471#endif
472	MBUF_CHECKSLEEP(how);
473#ifdef MAC
474	if (to->m_flags & M_PKTHDR)
475		m_tag_delete_chain(to, NULL);
476#endif
477	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
478	if ((to->m_flags & M_EXT) == 0)
479		to->m_data = to->m_pktdat;
480	to->m_pkthdr = from->m_pkthdr;
481	SLIST_INIT(&to->m_pkthdr.tags);
482	return (m_tag_copy_chain(to, from, MBTOM(how)));
483}
484
485/*
486 * Lesser-used path for M_PREPEND:
487 * allocate new mbuf to prepend to chain,
488 * copy junk along.
489 */
490struct mbuf *
491m_prepend(struct mbuf *m, int len, int how)
492{
493	struct mbuf *mn;
494
495	if (m->m_flags & M_PKTHDR)
496		MGETHDR(mn, how, m->m_type);
497	else
498		MGET(mn, how, m->m_type);
499	if (mn == NULL) {
500		m_freem(m);
501		return (NULL);
502	}
503	if (m->m_flags & M_PKTHDR)
504		M_MOVE_PKTHDR(mn, m);
505	mn->m_next = m;
506	m = mn;
507	if(m->m_flags & M_PKTHDR) {
508		if (len < MHLEN)
509			MH_ALIGN(m, len);
510	} else {
511		if (len < MLEN)
512			M_ALIGN(m, len);
513	}
514	m->m_len = len;
515	return (m);
516}
517
518/*
519 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
520 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
521 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
522 * Note that the copy is read-only, because clusters are not copied,
523 * only their reference counts are incremented.
524 */
525struct mbuf *
526m_copym(struct mbuf *m, int off0, int len, int wait)
527{
528	struct mbuf *n, **np;
529	int off = off0;
530	struct mbuf *top;
531	int copyhdr = 0;
532
533	KASSERT(off >= 0, ("m_copym, negative off %d", off));
534	KASSERT(len >= 0, ("m_copym, negative len %d", len));
535	MBUF_CHECKSLEEP(wait);
536	if (off == 0 && m->m_flags & M_PKTHDR)
537		copyhdr = 1;
538	while (off > 0) {
539		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
540		if (off < m->m_len)
541			break;
542		off -= m->m_len;
543		m = m->m_next;
544	}
545	np = &top;
546	top = 0;
547	while (len > 0) {
548		if (m == NULL) {
549			KASSERT(len == M_COPYALL,
550			    ("m_copym, length > size of mbuf chain"));
551			break;
552		}
553		if (copyhdr)
554			MGETHDR(n, wait, m->m_type);
555		else
556			MGET(n, wait, m->m_type);
557		*np = n;
558		if (n == NULL)
559			goto nospace;
560		if (copyhdr) {
561			if (!m_dup_pkthdr(n, m, wait))
562				goto nospace;
563			if (len == M_COPYALL)
564				n->m_pkthdr.len -= off0;
565			else
566				n->m_pkthdr.len = len;
567			copyhdr = 0;
568		}
569		n->m_len = min(len, m->m_len - off);
570		if (m->m_flags & M_EXT) {
571			n->m_data = m->m_data + off;
572			mb_dupcl(n, m);
573		} else
574			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
575			    (u_int)n->m_len);
576		if (len != M_COPYALL)
577			len -= n->m_len;
578		off = 0;
579		m = m->m_next;
580		np = &n->m_next;
581	}
582	if (top == NULL)
583		mbstat.m_mcfail++;	/* XXX: No consistency. */
584
585	return (top);
586nospace:
587	m_freem(top);
588	mbstat.m_mcfail++;	/* XXX: No consistency. */
589	return (NULL);
590}
591
592/*
593 * Returns mbuf chain with new head for the prepending case.
594 * Copies from mbuf (chain) n from off for len to mbuf (chain) m
595 * either prepending or appending the data.
596 * The resulting mbuf (chain) m is fully writeable.
597 * m is destination (is made writeable)
598 * n is source, off is offset in source, len is len from offset
599 * dir, 0 append, 1 prepend
600 * how, wait or nowait
601 */
602
603static int
604m_bcopyxxx(void *s, void *t, u_int len)
605{
606	bcopy(s, t, (size_t)len);
607	return 0;
608}
609
610struct mbuf *
611m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
612    int prep, int how)
613{
614	struct mbuf *mm, *x, *z, *prev = NULL;
615	caddr_t p;
616	int i, nlen = 0;
617	caddr_t buf[MLEN];
618
619	KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source"));
620	KASSERT(off >= 0, ("m_copymdata, negative off %d", off));
621	KASSERT(len >= 0, ("m_copymdata, negative len %d", len));
622	KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep));
623
624	mm = m;
625	if (!prep) {
626		while(mm->m_next) {
627			prev = mm;
628			mm = mm->m_next;
629		}
630	}
631	for (z = n; z != NULL; z = z->m_next)
632		nlen += z->m_len;
633	if (len == M_COPYALL)
634		len = nlen - off;
635	if (off + len > nlen || len < 1)
636		return NULL;
637
638	if (!M_WRITABLE(mm)) {
639		/* XXX: Use proper m_xxx function instead. */
640		x = m_getcl(how, MT_DATA, mm->m_flags);
641		if (x == NULL)
642			return NULL;
643		bcopy(mm->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
644		p = x->m_ext.ext_buf + (mm->m_data - mm->m_ext.ext_buf);
645		x->m_data = p;
646		mm->m_next = NULL;
647		if (mm != m)
648			prev->m_next = x;
649		m_free(mm);
650		mm = x;
651	}
652
653	/*
654	 * Append/prepend the data.  Allocating mbufs as necessary.
655	 */
656	/* Shortcut if enough free space in first/last mbuf. */
657	if (!prep && M_TRAILINGSPACE(mm) >= len) {
658		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t) +
659			 mm->m_len);
660		mm->m_len += len;
661		mm->m_pkthdr.len += len;
662		return m;
663	}
664	if (prep && M_LEADINGSPACE(mm) >= len) {
665		mm->m_data = mtod(mm, caddr_t) - len;
666		m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t));
667		mm->m_len += len;
668		mm->m_pkthdr.len += len;
669		return mm;
670	}
671
672	/* Expand first/last mbuf to cluster if possible. */
673	if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) {
674		bcopy(mm->m_data, &buf, mm->m_len);
675		m_clget(mm, how);
676		if (!(mm->m_flags & M_EXT))
677			return NULL;
678		bcopy(&buf, mm->m_ext.ext_buf, mm->m_len);
679		mm->m_data = mm->m_ext.ext_buf;
680		mm->m_pkthdr.header = NULL;
681	}
682	if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) {
683		bcopy(mm->m_data, &buf, mm->m_len);
684		m_clget(mm, how);
685		if (!(mm->m_flags & M_EXT))
686			return NULL;
687		bcopy(&buf, (caddr_t *)mm->m_ext.ext_buf +
688		       mm->m_ext.ext_size - mm->m_len, mm->m_len);
689		mm->m_data = (caddr_t)mm->m_ext.ext_buf +
690			      mm->m_ext.ext_size - mm->m_len;
691		mm->m_pkthdr.header = NULL;
692	}
693
694	/* Append/prepend as many mbuf (clusters) as necessary to fit len. */
695	if (!prep && len > M_TRAILINGSPACE(mm)) {
696		if (!m_getm(mm, len - M_TRAILINGSPACE(mm), how, MT_DATA))
697			return NULL;
698	}
699	if (prep && len > M_LEADINGSPACE(mm)) {
700		if (!(z = m_getm(NULL, len - M_LEADINGSPACE(mm), how, MT_DATA)))
701			return NULL;
702		i = 0;
703		for (x = z; x != NULL; x = x->m_next) {
704			i += x->m_flags & M_EXT ? x->m_ext.ext_size :
705			      (x->m_flags & M_PKTHDR ? MHLEN : MLEN);
706			if (!x->m_next)
707				break;
708		}
709		z->m_data += i - len;
710		m_move_pkthdr(mm, z);
711		x->m_next = mm;
712		mm = z;
713	}
714
715	/* Seek to start position in source mbuf. Optimization for long chains. */
716	while (off > 0) {
717		if (off < n->m_len)
718			break;
719		off -= n->m_len;
720		n = n->m_next;
721	}
722
723	/* Copy data into target mbuf. */
724	z = mm;
725	while (len > 0) {
726		KASSERT(z != NULL, ("m_copymdata, falling off target edge"));
727		i = M_TRAILINGSPACE(z);
728		m_apply(n, off, i, m_bcopyxxx, mtod(z, caddr_t) + z->m_len);
729		z->m_len += i;
730		/* fixup pkthdr.len if necessary */
731		if ((prep ? mm : m)->m_flags & M_PKTHDR)
732			(prep ? mm : m)->m_pkthdr.len += i;
733		off += i;
734		len -= i;
735		z = z->m_next;
736	}
737	return (prep ? mm : m);
738}
739
740/*
741 * Copy an entire packet, including header (which must be present).
742 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
743 * Note that the copy is read-only, because clusters are not copied,
744 * only their reference counts are incremented.
745 * Preserve alignment of the first mbuf so if the creator has left
746 * some room at the beginning (e.g. for inserting protocol headers)
747 * the copies still have the room available.
748 */
749struct mbuf *
750m_copypacket(struct mbuf *m, int how)
751{
752	struct mbuf *top, *n, *o;
753
754	MBUF_CHECKSLEEP(how);
755	MGET(n, how, m->m_type);
756	top = n;
757	if (n == NULL)
758		goto nospace;
759
760	if (!m_dup_pkthdr(n, m, how))
761		goto nospace;
762	n->m_len = m->m_len;
763	if (m->m_flags & M_EXT) {
764		n->m_data = m->m_data;
765		mb_dupcl(n, m);
766	} else {
767		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
768		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
769	}
770
771	m = m->m_next;
772	while (m) {
773		MGET(o, how, m->m_type);
774		if (o == NULL)
775			goto nospace;
776
777		n->m_next = o;
778		n = n->m_next;
779
780		n->m_len = m->m_len;
781		if (m->m_flags & M_EXT) {
782			n->m_data = m->m_data;
783			mb_dupcl(n, m);
784		} else {
785			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
786		}
787
788		m = m->m_next;
789	}
790	return top;
791nospace:
792	m_freem(top);
793	mbstat.m_mcfail++;	/* XXX: No consistency. */
794	return (NULL);
795}
796
797/*
798 * Copy data from an mbuf chain starting "off" bytes from the beginning,
799 * continuing for "len" bytes, into the indicated buffer.
800 */
801void
802m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
803{
804	u_int count;
805
806	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
807	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
808	while (off > 0) {
809		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
810		if (off < m->m_len)
811			break;
812		off -= m->m_len;
813		m = m->m_next;
814	}
815	while (len > 0) {
816		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
817		count = min(m->m_len - off, len);
818		bcopy(mtod(m, caddr_t) + off, cp, count);
819		len -= count;
820		cp += count;
821		off = 0;
822		m = m->m_next;
823	}
824}
825
826/*
827 * Copy a packet header mbuf chain into a completely new chain, including
828 * copying any mbuf clusters.  Use this instead of m_copypacket() when
829 * you need a writable copy of an mbuf chain.
830 */
831struct mbuf *
832m_dup(struct mbuf *m, int how)
833{
834	struct mbuf **p, *top = NULL;
835	int remain, moff, nsize;
836
837	MBUF_CHECKSLEEP(how);
838	/* Sanity check */
839	if (m == NULL)
840		return (NULL);
841	M_ASSERTPKTHDR(m);
842
843	/* While there's more data, get a new mbuf, tack it on, and fill it */
844	remain = m->m_pkthdr.len;
845	moff = 0;
846	p = &top;
847	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
848		struct mbuf *n;
849
850		/* Get the next new mbuf */
851		if (remain >= MINCLSIZE) {
852			n = m_getcl(how, m->m_type, 0);
853			nsize = MCLBYTES;
854		} else {
855			n = m_get(how, m->m_type);
856			nsize = MLEN;
857		}
858		if (n == NULL)
859			goto nospace;
860
861		if (top == NULL) {		/* First one, must be PKTHDR */
862			if (!m_dup_pkthdr(n, m, how)) {
863				m_free(n);
864				goto nospace;
865			}
866			if ((n->m_flags & M_EXT) == 0)
867				nsize = MHLEN;
868		}
869		n->m_len = 0;
870
871		/* Link it into the new chain */
872		*p = n;
873		p = &n->m_next;
874
875		/* Copy data from original mbuf(s) into new mbuf */
876		while (n->m_len < nsize && m != NULL) {
877			int chunk = min(nsize - n->m_len, m->m_len - moff);
878
879			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
880			moff += chunk;
881			n->m_len += chunk;
882			remain -= chunk;
883			if (moff == m->m_len) {
884				m = m->m_next;
885				moff = 0;
886			}
887		}
888
889		/* Check correct total mbuf length */
890		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
891		    	("%s: bogus m_pkthdr.len", __func__));
892	}
893	return (top);
894
895nospace:
896	m_freem(top);
897	mbstat.m_mcfail++;	/* XXX: No consistency. */
898	return (NULL);
899}
900
901/*
902 * Concatenate mbuf chain n to m.
903 * Both chains must be of the same type (e.g. MT_DATA).
904 * Any m_pkthdr is not updated.
905 */
906void
907m_cat(struct mbuf *m, struct mbuf *n)
908{
909	while (m->m_next)
910		m = m->m_next;
911	while (n) {
912		if (m->m_flags & M_EXT ||
913		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
914			/* just join the two chains */
915			m->m_next = n;
916			return;
917		}
918		/* splat the data from one into the other */
919		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
920		    (u_int)n->m_len);
921		m->m_len += n->m_len;
922		n = m_free(n);
923	}
924}
925
926void
927m_adj(struct mbuf *mp, int req_len)
928{
929	int len = req_len;
930	struct mbuf *m;
931	int count;
932
933	if ((m = mp) == NULL)
934		return;
935	if (len >= 0) {
936		/*
937		 * Trim from head.
938		 */
939		while (m != NULL && len > 0) {
940			if (m->m_len <= len) {
941				len -= m->m_len;
942				m->m_len = 0;
943				m = m->m_next;
944			} else {
945				m->m_len -= len;
946				m->m_data += len;
947				len = 0;
948			}
949		}
950		m = mp;
951		if (mp->m_flags & M_PKTHDR)
952			m->m_pkthdr.len -= (req_len - len);
953	} else {
954		/*
955		 * Trim from tail.  Scan the mbuf chain,
956		 * calculating its length and finding the last mbuf.
957		 * If the adjustment only affects this mbuf, then just
958		 * adjust and return.  Otherwise, rescan and truncate
959		 * after the remaining size.
960		 */
961		len = -len;
962		count = 0;
963		for (;;) {
964			count += m->m_len;
965			if (m->m_next == (struct mbuf *)0)
966				break;
967			m = m->m_next;
968		}
969		if (m->m_len >= len) {
970			m->m_len -= len;
971			if (mp->m_flags & M_PKTHDR)
972				mp->m_pkthdr.len -= len;
973			return;
974		}
975		count -= len;
976		if (count < 0)
977			count = 0;
978		/*
979		 * Correct length for chain is "count".
980		 * Find the mbuf with last data, adjust its length,
981		 * and toss data from remaining mbufs on chain.
982		 */
983		m = mp;
984		if (m->m_flags & M_PKTHDR)
985			m->m_pkthdr.len = count;
986		for (; m; m = m->m_next) {
987			if (m->m_len >= count) {
988				m->m_len = count;
989				if (m->m_next != NULL) {
990					m_freem(m->m_next);
991					m->m_next = NULL;
992				}
993				break;
994			}
995			count -= m->m_len;
996		}
997	}
998}
999
1000/*
1001 * Rearange an mbuf chain so that len bytes are contiguous
1002 * and in the data area of an mbuf (so that mtod and dtom
1003 * will work for a structure of size len).  Returns the resulting
1004 * mbuf chain on success, frees it and returns null on failure.
1005 * If there is room, it will add up to max_protohdr-len extra bytes to the
1006 * contiguous region in an attempt to avoid being called next time.
1007 */
1008struct mbuf *
1009m_pullup(struct mbuf *n, int len)
1010{
1011	struct mbuf *m;
1012	int count;
1013	int space;
1014
1015	/*
1016	 * If first mbuf has no cluster, and has room for len bytes
1017	 * without shifting current data, pullup into it,
1018	 * otherwise allocate a new mbuf to prepend to the chain.
1019	 */
1020	if ((n->m_flags & M_EXT) == 0 &&
1021	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
1022		if (n->m_len >= len)
1023			return (n);
1024		m = n;
1025		n = n->m_next;
1026		len -= m->m_len;
1027	} else {
1028		if (len > MHLEN)
1029			goto bad;
1030		MGET(m, M_DONTWAIT, n->m_type);
1031		if (m == NULL)
1032			goto bad;
1033		m->m_len = 0;
1034		if (n->m_flags & M_PKTHDR)
1035			M_MOVE_PKTHDR(m, n);
1036	}
1037	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1038	do {
1039		count = min(min(max(len, max_protohdr), space), n->m_len);
1040		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1041		  (u_int)count);
1042		len -= count;
1043		m->m_len += count;
1044		n->m_len -= count;
1045		space -= count;
1046		if (n->m_len)
1047			n->m_data += count;
1048		else
1049			n = m_free(n);
1050	} while (len > 0 && n);
1051	if (len > 0) {
1052		(void) m_free(m);
1053		goto bad;
1054	}
1055	m->m_next = n;
1056	return (m);
1057bad:
1058	m_freem(n);
1059	mbstat.m_mpfail++;	/* XXX: No consistency. */
1060	return (NULL);
1061}
1062
1063/*
1064 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1065 * the amount of empty space before the data in the new mbuf to be specified
1066 * (in the event that the caller expects to prepend later).
1067 */
1068int MSFail;
1069
1070struct mbuf *
1071m_copyup(struct mbuf *n, int len, int dstoff)
1072{
1073	struct mbuf *m;
1074	int count, space;
1075
1076	if (len > (MHLEN - dstoff))
1077		goto bad;
1078	MGET(m, M_DONTWAIT, n->m_type);
1079	if (m == NULL)
1080		goto bad;
1081	m->m_len = 0;
1082	if (n->m_flags & M_PKTHDR)
1083		M_MOVE_PKTHDR(m, n);
1084	m->m_data += dstoff;
1085	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1086	do {
1087		count = min(min(max(len, max_protohdr), space), n->m_len);
1088		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
1089		    (unsigned)count);
1090		len -= count;
1091		m->m_len += count;
1092		n->m_len -= count;
1093		space -= count;
1094		if (n->m_len)
1095			n->m_data += count;
1096		else
1097			n = m_free(n);
1098	} while (len > 0 && n);
1099	if (len > 0) {
1100		(void) m_free(m);
1101		goto bad;
1102	}
1103	m->m_next = n;
1104	return (m);
1105 bad:
1106	m_freem(n);
1107	MSFail++;
1108	return (NULL);
1109}
1110
1111/*
1112 * Partition an mbuf chain in two pieces, returning the tail --
1113 * all but the first len0 bytes.  In case of failure, it returns NULL and
1114 * attempts to restore the chain to its original state.
1115 *
1116 * Note that the resulting mbufs might be read-only, because the new
1117 * mbuf can end up sharing an mbuf cluster with the original mbuf if
1118 * the "breaking point" happens to lie within a cluster mbuf. Use the
1119 * M_WRITABLE() macro to check for this case.
1120 */
1121struct mbuf *
1122m_split(struct mbuf *m0, int len0, int wait)
1123{
1124	struct mbuf *m, *n;
1125	u_int len = len0, remain;
1126
1127	MBUF_CHECKSLEEP(wait);
1128	for (m = m0; m && len > m->m_len; m = m->m_next)
1129		len -= m->m_len;
1130	if (m == NULL)
1131		return (NULL);
1132	remain = m->m_len - len;
1133	if (m0->m_flags & M_PKTHDR) {
1134		MGETHDR(n, wait, m0->m_type);
1135		if (n == NULL)
1136			return (NULL);
1137		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1138		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1139		m0->m_pkthdr.len = len0;
1140		if (m->m_flags & M_EXT)
1141			goto extpacket;
1142		if (remain > MHLEN) {
1143			/* m can't be the lead packet */
1144			MH_ALIGN(n, 0);
1145			n->m_next = m_split(m, len, wait);
1146			if (n->m_next == NULL) {
1147				(void) m_free(n);
1148				return (NULL);
1149			} else {
1150				n->m_len = 0;
1151				return (n);
1152			}
1153		} else
1154			MH_ALIGN(n, remain);
1155	} else if (remain == 0) {
1156		n = m->m_next;
1157		m->m_next = NULL;
1158		return (n);
1159	} else {
1160		MGET(n, wait, m->m_type);
1161		if (n == NULL)
1162			return (NULL);
1163		M_ALIGN(n, remain);
1164	}
1165extpacket:
1166	if (m->m_flags & M_EXT) {
1167		n->m_data = m->m_data + len;
1168		mb_dupcl(n, m);
1169	} else {
1170		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1171	}
1172	n->m_len = remain;
1173	m->m_len = len;
1174	n->m_next = m->m_next;
1175	m->m_next = NULL;
1176	return (n);
1177}
1178/*
1179 * Routine to copy from device local memory into mbufs.
1180 * Note that `off' argument is offset into first mbuf of target chain from
1181 * which to begin copying the data to.
1182 */
1183struct mbuf *
1184m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
1185    void (*copy)(char *from, caddr_t to, u_int len))
1186{
1187	struct mbuf *m;
1188	struct mbuf *top = NULL, **mp = &top;
1189	int len;
1190
1191	if (off < 0 || off > MHLEN)
1192		return (NULL);
1193
1194	while (totlen > 0) {
1195		if (top == NULL) {	/* First one, must be PKTHDR */
1196			if (totlen + off >= MINCLSIZE) {
1197				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1198				len = MCLBYTES;
1199			} else {
1200				m = m_gethdr(M_DONTWAIT, MT_DATA);
1201				len = MHLEN;
1202
1203				/* Place initial small packet/header at end of mbuf */
1204				if (m && totlen + off + max_linkhdr <= MLEN) {
1205					m->m_data += max_linkhdr;
1206					len -= max_linkhdr;
1207				}
1208			}
1209			if (m == NULL)
1210				return NULL;
1211			m->m_pkthdr.rcvif = ifp;
1212			m->m_pkthdr.len = totlen;
1213		} else {
1214			if (totlen + off >= MINCLSIZE) {
1215				m = m_getcl(M_DONTWAIT, MT_DATA, 0);
1216				len = MCLBYTES;
1217			} else {
1218				m = m_get(M_DONTWAIT, MT_DATA);
1219				len = MLEN;
1220			}
1221			if (m == NULL) {
1222				m_freem(top);
1223				return NULL;
1224			}
1225		}
1226		if (off) {
1227			m->m_data += off;
1228			len -= off;
1229			off = 0;
1230		}
1231		m->m_len = len = min(totlen, len);
1232		if (copy)
1233			copy(buf, mtod(m, caddr_t), (u_int)len);
1234		else
1235			bcopy(buf, mtod(m, caddr_t), (u_int)len);
1236		buf += len;
1237		*mp = m;
1238		mp = &m->m_next;
1239		totlen -= len;
1240	}
1241	return (top);
1242}
1243
1244/*
1245 * Copy data from a buffer back into the indicated mbuf chain,
1246 * starting "off" bytes from the beginning, extending the mbuf
1247 * chain if necessary.
1248 */
1249void
1250m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
1251{
1252	int mlen;
1253	struct mbuf *m = m0, *n;
1254	int totlen = 0;
1255
1256	if (m0 == NULL)
1257		return;
1258	while (off > (mlen = m->m_len)) {
1259		off -= mlen;
1260		totlen += mlen;
1261		if (m->m_next == NULL) {
1262			n = m_get(M_DONTWAIT, m->m_type);
1263			if (n == NULL)
1264				goto out;
1265			bzero(mtod(n, caddr_t), MLEN);
1266			n->m_len = min(MLEN, len + off);
1267			m->m_next = n;
1268		}
1269		m = m->m_next;
1270	}
1271	while (len > 0) {
1272		if (m->m_next == NULL && (len > m->m_len - off)) {
1273			m->m_len += min(len - (m->m_len - off),
1274			    M_TRAILINGSPACE(m));
1275		}
1276		mlen = min (m->m_len - off, len);
1277		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
1278		cp += mlen;
1279		len -= mlen;
1280		mlen += off;
1281		off = 0;
1282		totlen += mlen;
1283		if (len == 0)
1284			break;
1285		if (m->m_next == NULL) {
1286			n = m_get(M_DONTWAIT, m->m_type);
1287			if (n == NULL)
1288				break;
1289			n->m_len = min(MLEN, len);
1290			m->m_next = n;
1291		}
1292		m = m->m_next;
1293	}
1294out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1295		m->m_pkthdr.len = totlen;
1296}
1297
1298/*
1299 * Append the specified data to the indicated mbuf chain,
1300 * Extend the mbuf chain if the new data does not fit in
1301 * existing space.
1302 *
1303 * Return 1 if able to complete the job; otherwise 0.
1304 */
1305int
1306m_append(struct mbuf *m0, int len, c_caddr_t cp)
1307{
1308	struct mbuf *m, *n;
1309	int remainder, space;
1310
1311	for (m = m0; m->m_next != NULL; m = m->m_next)
1312		;
1313	remainder = len;
1314	space = M_TRAILINGSPACE(m);
1315	if (space > 0) {
1316		/*
1317		 * Copy into available space.
1318		 */
1319		if (space > remainder)
1320			space = remainder;
1321		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1322		m->m_len += space;
1323		cp += space, remainder -= space;
1324	}
1325	while (remainder > 0) {
1326		/*
1327		 * Allocate a new mbuf; could check space
1328		 * and allocate a cluster instead.
1329		 */
1330		n = m_get(M_DONTWAIT, m->m_type);
1331		if (n == NULL)
1332			break;
1333		n->m_len = min(MLEN, remainder);
1334		bcopy(cp, mtod(n, caddr_t), n->m_len);
1335		cp += n->m_len, remainder -= n->m_len;
1336		m->m_next = n;
1337		m = n;
1338	}
1339	if (m0->m_flags & M_PKTHDR)
1340		m0->m_pkthdr.len += len - remainder;
1341	return (remainder == 0);
1342}
1343
1344/*
1345 * Apply function f to the data in an mbuf chain starting "off" bytes from
1346 * the beginning, continuing for "len" bytes.
1347 */
1348int
1349m_apply(struct mbuf *m, int off, int len,
1350    int (*f)(void *, void *, u_int), void *arg)
1351{
1352	u_int count;
1353	int rval;
1354
1355	KASSERT(off >= 0, ("m_apply, negative off %d", off));
1356	KASSERT(len >= 0, ("m_apply, negative len %d", len));
1357	while (off > 0) {
1358		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1359		if (off < m->m_len)
1360			break;
1361		off -= m->m_len;
1362		m = m->m_next;
1363	}
1364	while (len > 0) {
1365		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1366		count = min(m->m_len - off, len);
1367		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1368		if (rval)
1369			return (rval);
1370		len -= count;
1371		off = 0;
1372		m = m->m_next;
1373	}
1374	return (0);
1375}
1376
1377/*
1378 * Return a pointer to mbuf/offset of location in mbuf chain.
1379 */
1380struct mbuf *
1381m_getptr(struct mbuf *m, int loc, int *off)
1382{
1383
1384	while (loc >= 0) {
1385		/* Normal end of search. */
1386		if (m->m_len > loc) {
1387			*off = loc;
1388			return (m);
1389		} else {
1390			loc -= m->m_len;
1391			if (m->m_next == NULL) {
1392				if (loc == 0) {
1393					/* Point at the end of valid data. */
1394					*off = m->m_len;
1395					return (m);
1396				}
1397				return (NULL);
1398			}
1399			m = m->m_next;
1400		}
1401	}
1402	return (NULL);
1403}
1404
1405void
1406m_print(const struct mbuf *m, int maxlen)
1407{
1408	int len;
1409	int pdata;
1410	const struct mbuf *m2;
1411
1412	if (m->m_flags & M_PKTHDR)
1413		len = m->m_pkthdr.len;
1414	else
1415		len = -1;
1416	m2 = m;
1417	while (m2 != NULL && (len == -1 || len)) {
1418		pdata = m2->m_len;
1419		if (maxlen != -1 && pdata > maxlen)
1420			pdata = maxlen;
1421		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
1422		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
1423		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
1424		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
1425		if (pdata)
1426			printf(", %*D\n", pdata, (u_char *)m2->m_data, "-");
1427		if (len != -1)
1428			len -= m2->m_len;
1429		m2 = m2->m_next;
1430	}
1431	if (len > 0)
1432		printf("%d bytes unaccounted for.\n", len);
1433	return;
1434}
1435
1436u_int
1437m_fixhdr(struct mbuf *m0)
1438{
1439	u_int len;
1440
1441	len = m_length(m0, NULL);
1442	m0->m_pkthdr.len = len;
1443	return (len);
1444}
1445
1446u_int
1447m_length(struct mbuf *m0, struct mbuf **last)
1448{
1449	struct mbuf *m;
1450	u_int len;
1451
1452	len = 0;
1453	for (m = m0; m != NULL; m = m->m_next) {
1454		len += m->m_len;
1455		if (m->m_next == NULL)
1456			break;
1457	}
1458	if (last != NULL)
1459		*last = m;
1460	return (len);
1461}
1462
1463/*
1464 * Defragment a mbuf chain, returning the shortest possible
1465 * chain of mbufs and clusters.  If allocation fails and
1466 * this cannot be completed, NULL will be returned, but
1467 * the passed in chain will be unchanged.  Upon success,
1468 * the original chain will be freed, and the new chain
1469 * will be returned.
1470 *
1471 * If a non-packet header is passed in, the original
1472 * mbuf (chain?) will be returned unharmed.
1473 */
1474struct mbuf *
1475m_defrag(struct mbuf *m0, int how)
1476{
1477	struct mbuf *m_new = NULL, *m_final = NULL;
1478	int progress = 0, length;
1479
1480	MBUF_CHECKSLEEP(how);
1481	if (!(m0->m_flags & M_PKTHDR))
1482		return (m0);
1483
1484	m_fixhdr(m0); /* Needed sanity check */
1485
1486#ifdef MBUF_STRESS_TEST
1487	if (m_defragrandomfailures) {
1488		int temp = arc4random() & 0xff;
1489		if (temp == 0xba)
1490			goto nospace;
1491	}
1492#endif
1493
1494	if (m0->m_pkthdr.len > MHLEN)
1495		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1496	else
1497		m_final = m_gethdr(how, MT_DATA);
1498
1499	if (m_final == NULL)
1500		goto nospace;
1501
1502	if (m_dup_pkthdr(m_final, m0, how) == 0)
1503		goto nospace;
1504
1505	m_new = m_final;
1506
1507	while (progress < m0->m_pkthdr.len) {
1508		length = m0->m_pkthdr.len - progress;
1509		if (length > MCLBYTES)
1510			length = MCLBYTES;
1511
1512		if (m_new == NULL) {
1513			if (length > MLEN)
1514				m_new = m_getcl(how, MT_DATA, 0);
1515			else
1516				m_new = m_get(how, MT_DATA);
1517			if (m_new == NULL)
1518				goto nospace;
1519		}
1520
1521		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1522		progress += length;
1523		m_new->m_len = length;
1524		if (m_new != m_final)
1525			m_cat(m_final, m_new);
1526		m_new = NULL;
1527	}
1528#ifdef MBUF_STRESS_TEST
1529	if (m0->m_next == NULL)
1530		m_defraguseless++;
1531#endif
1532	m_freem(m0);
1533	m0 = m_final;
1534#ifdef MBUF_STRESS_TEST
1535	m_defragpackets++;
1536	m_defragbytes += m0->m_pkthdr.len;
1537#endif
1538	return (m0);
1539nospace:
1540#ifdef MBUF_STRESS_TEST
1541	m_defragfailure++;
1542#endif
1543	if (m_final)
1544		m_freem(m_final);
1545	return (NULL);
1546}
1547
1548/*
1549 * Defragment an mbuf chain, returning at most maxfrags separate
1550 * mbufs+clusters.  If this is not possible NULL is returned and
1551 * the original mbuf chain is left in it's present (potentially
1552 * modified) state.  We use two techniques: collapsing consecutive
1553 * mbufs and replacing consecutive mbufs by a cluster.
1554 *
1555 * NB: this should really be named m_defrag but that name is taken
1556 */
1557struct mbuf *
1558m_collapse(struct mbuf *m0, int how, int maxfrags)
1559{
1560	struct mbuf *m, *n, *n2, **prev;
1561	u_int curfrags;
1562
1563	/*
1564	 * Calculate the current number of frags.
1565	 */
1566	curfrags = 0;
1567	for (m = m0; m != NULL; m = m->m_next)
1568		curfrags++;
1569	/*
1570	 * First, try to collapse mbufs.  Note that we always collapse
1571	 * towards the front so we don't need to deal with moving the
1572	 * pkthdr.  This may be suboptimal if the first mbuf has much
1573	 * less data than the following.
1574	 */
1575	m = m0;
1576again:
1577	for (;;) {
1578		n = m->m_next;
1579		if (n == NULL)
1580			break;
1581		if ((m->m_flags & M_RDONLY) == 0 &&
1582		    n->m_len < M_TRAILINGSPACE(m)) {
1583			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
1584				n->m_len);
1585			m->m_len += n->m_len;
1586			m->m_next = n->m_next;
1587			m_free(n);
1588			if (--curfrags <= maxfrags)
1589				return m0;
1590		} else
1591			m = n;
1592	}
1593	KASSERT(maxfrags > 1,
1594		("maxfrags %u, but normal collapse failed", maxfrags));
1595	/*
1596	 * Collapse consecutive mbufs to a cluster.
1597	 */
1598	prev = &m0->m_next;		/* NB: not the first mbuf */
1599	while ((n = *prev) != NULL) {
1600		if ((n2 = n->m_next) != NULL &&
1601		    n->m_len + n2->m_len < MCLBYTES) {
1602			m = m_getcl(how, MT_DATA, 0);
1603			if (m == NULL)
1604				goto bad;
1605			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
1606			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
1607				n2->m_len);
1608			m->m_len = n->m_len + n2->m_len;
1609			m->m_next = n2->m_next;
1610			*prev = m;
1611			m_free(n);
1612			m_free(n2);
1613			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
1614				return m0;
1615			/*
1616			 * Still not there, try the normal collapse
1617			 * again before we allocate another cluster.
1618			 */
1619			goto again;
1620		}
1621		prev = &n->m_next;
1622	}
1623	/*
1624	 * No place where we can collapse to a cluster; punt.
1625	 * This can occur if, for example, you request 2 frags
1626	 * but the packet requires that both be clusters (we
1627	 * never reallocate the first mbuf to avoid moving the
1628	 * packet header).
1629	 */
1630bad:
1631	return NULL;
1632}
1633
1634#ifdef MBUF_STRESS_TEST
1635
1636/*
1637 * Fragment an mbuf chain.  There's no reason you'd ever want to do
1638 * this in normal usage, but it's great for stress testing various
1639 * mbuf consumers.
1640 *
1641 * If fragmentation is not possible, the original chain will be
1642 * returned.
1643 *
1644 * Possible length values:
1645 * 0	 no fragmentation will occur
1646 * > 0	each fragment will be of the specified length
1647 * -1	each fragment will be the same random value in length
1648 * -2	each fragment's length will be entirely random
1649 * (Random values range from 1 to 256)
1650 */
1651struct mbuf *
1652m_fragment(struct mbuf *m0, int how, int length)
1653{
1654	struct mbuf *m_new = NULL, *m_final = NULL;
1655	int progress = 0;
1656
1657	if (!(m0->m_flags & M_PKTHDR))
1658		return (m0);
1659
1660	if ((length == 0) || (length < -2))
1661		return (m0);
1662
1663	m_fixhdr(m0); /* Needed sanity check */
1664
1665	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1666
1667	if (m_final == NULL)
1668		goto nospace;
1669
1670	if (m_dup_pkthdr(m_final, m0, how) == 0)
1671		goto nospace;
1672
1673	m_new = m_final;
1674
1675	if (length == -1)
1676		length = 1 + (arc4random() & 255);
1677
1678	while (progress < m0->m_pkthdr.len) {
1679		int fraglen;
1680
1681		if (length > 0)
1682			fraglen = length;
1683		else
1684			fraglen = 1 + (arc4random() & 255);
1685		if (fraglen > m0->m_pkthdr.len - progress)
1686			fraglen = m0->m_pkthdr.len - progress;
1687
1688		if (fraglen > MCLBYTES)
1689			fraglen = MCLBYTES;
1690
1691		if (m_new == NULL) {
1692			m_new = m_getcl(how, MT_DATA, 0);
1693			if (m_new == NULL)
1694				goto nospace;
1695		}
1696
1697		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1698		progress += fraglen;
1699		m_new->m_len = fraglen;
1700		if (m_new != m_final)
1701			m_cat(m_final, m_new);
1702		m_new = NULL;
1703	}
1704	m_freem(m0);
1705	m0 = m_final;
1706	return (m0);
1707nospace:
1708	if (m_final)
1709		m_freem(m_final);
1710	/* Return the original chain on failure */
1711	return (m0);
1712}
1713
1714#endif
1715
1716/*
1717 * Copy the contents of uio into a properly sized mbuf chain.
1718 */
1719struct mbuf *
1720m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
1721{
1722	struct mbuf *m, *mb;
1723	int error, length, total;
1724	int progress = 0;
1725
1726	/*
1727	 * len can be zero or an arbitrary large value bound by
1728	 * the total data supplied by the uio.
1729	 */
1730	if (len > 0)
1731		total = min(uio->uio_resid, len);
1732	else
1733		total = uio->uio_resid;
1734
1735	/*
1736	 * The smallest unit returned by m_getm2() is a single mbuf
1737	 * with pkthdr.  We can't align past it.
1738	 */
1739	if (align >= MHLEN)
1740		return (NULL);
1741
1742	/*
1743	 * Give us the full allocation or nothing.
1744	 * If len is zero return the smallest empty mbuf.
1745	 */
1746	m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags);
1747	if (m == NULL)
1748		return (NULL);
1749	m->m_data += align;
1750
1751	/* Fill all mbufs with uio data and update header information. */
1752	for (mb = m; mb != NULL; mb = mb->m_next) {
1753		length = min(M_TRAILINGSPACE(mb), total - progress);
1754
1755		error = uiomove(mtod(mb, void *), length, uio);
1756		if (error) {
1757			m_freem(m);
1758			return (NULL);
1759		}
1760
1761		mb->m_len = length;
1762		progress += length;
1763		if (flags & M_PKTHDR)
1764			m->m_pkthdr.len += length;
1765	}
1766	KASSERT(progress == total, ("%s: progress != total", __func__));
1767
1768	return (m);
1769}
1770
1771/*
1772 * Set the m_data pointer of a newly-allocated mbuf
1773 * to place an object of the specified size at the
1774 * end of the mbuf, longword aligned.
1775 */
1776void
1777m_align(struct mbuf *m, int len)
1778{
1779	int adjust;
1780
1781	if (m->m_flags & M_EXT)
1782		adjust = m->m_ext.ext_size - len;
1783	else if (m->m_flags & M_PKTHDR)
1784		adjust = MHLEN - len;
1785	else
1786		adjust = MLEN - len;
1787	m->m_data += adjust &~ (sizeof(long)-1);
1788}
1789
1790/*
1791 * Create a writable copy of the mbuf chain.  While doing this
1792 * we compact the chain with a goal of producing a chain with
1793 * at most two mbufs.  The second mbuf in this chain is likely
1794 * to be a cluster.  The primary purpose of this work is to create
1795 * a writable packet for encryption, compression, etc.  The
1796 * secondary goal is to linearize the data so the data can be
1797 * passed to crypto hardware in the most efficient manner possible.
1798 */
1799struct mbuf *
1800m_unshare(struct mbuf *m0, int how)
1801{
1802	struct mbuf *m, *mprev;
1803	struct mbuf *n, *mfirst, *mlast;
1804	int len, off;
1805
1806	mprev = NULL;
1807	for (m = m0; m != NULL; m = mprev->m_next) {
1808		/*
1809		 * Regular mbufs are ignored unless there's a cluster
1810		 * in front of it that we can use to coalesce.  We do
1811		 * the latter mainly so later clusters can be coalesced
1812		 * also w/o having to handle them specially (i.e. convert
1813		 * mbuf+cluster -> cluster).  This optimization is heavily
1814		 * influenced by the assumption that we're running over
1815		 * Ethernet where MCLBYTES is large enough that the max
1816		 * packet size will permit lots of coalescing into a
1817		 * single cluster.  This in turn permits efficient
1818		 * crypto operations, especially when using hardware.
1819		 */
1820		if ((m->m_flags & M_EXT) == 0) {
1821			if (mprev && (mprev->m_flags & M_EXT) &&
1822			    m->m_len <= M_TRAILINGSPACE(mprev)) {
1823				/* XXX: this ignores mbuf types */
1824				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1825				       mtod(m, caddr_t), m->m_len);
1826				mprev->m_len += m->m_len;
1827				mprev->m_next = m->m_next;	/* unlink from chain */
1828				m_free(m);			/* reclaim mbuf */
1829#if 0
1830				newipsecstat.ips_mbcoalesced++;
1831#endif
1832			} else {
1833				mprev = m;
1834			}
1835			continue;
1836		}
1837		/*
1838		 * Writable mbufs are left alone (for now).
1839		 */
1840		if (M_WRITABLE(m)) {
1841			mprev = m;
1842			continue;
1843		}
1844
1845		/*
1846		 * Not writable, replace with a copy or coalesce with
1847		 * the previous mbuf if possible (since we have to copy
1848		 * it anyway, we try to reduce the number of mbufs and
1849		 * clusters so that future work is easier).
1850		 */
1851		KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
1852		/* NB: we only coalesce into a cluster or larger */
1853		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
1854		    m->m_len <= M_TRAILINGSPACE(mprev)) {
1855			/* XXX: this ignores mbuf types */
1856			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
1857			       mtod(m, caddr_t), m->m_len);
1858			mprev->m_len += m->m_len;
1859			mprev->m_next = m->m_next;	/* unlink from chain */
1860			m_free(m);			/* reclaim mbuf */
1861#if 0
1862			newipsecstat.ips_clcoalesced++;
1863#endif
1864			continue;
1865		}
1866
1867		/*
1868		 * Allocate new space to hold the copy...
1869		 */
1870		/* XXX why can M_PKTHDR be set past the first mbuf? */
1871		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
1872			/*
1873			 * NB: if a packet header is present we must
1874			 * allocate the mbuf separately from any cluster
1875			 * because M_MOVE_PKTHDR will smash the data
1876			 * pointer and drop the M_EXT marker.
1877			 */
1878			MGETHDR(n, how, m->m_type);
1879			if (n == NULL) {
1880				m_freem(m0);
1881				return (NULL);
1882			}
1883			M_MOVE_PKTHDR(n, m);
1884			MCLGET(n, how);
1885			if ((n->m_flags & M_EXT) == 0) {
1886				m_free(n);
1887				m_freem(m0);
1888				return (NULL);
1889			}
1890		} else {
1891			n = m_getcl(how, m->m_type, m->m_flags);
1892			if (n == NULL) {
1893				m_freem(m0);
1894				return (NULL);
1895			}
1896		}
1897		/*
1898		 * ... and copy the data.  We deal with jumbo mbufs
1899		 * (i.e. m_len > MCLBYTES) by splitting them into
1900		 * clusters.  We could just malloc a buffer and make
1901		 * it external but too many device drivers don't know
1902		 * how to break up the non-contiguous memory when
1903		 * doing DMA.
1904		 */
1905		len = m->m_len;
1906		off = 0;
1907		mfirst = n;
1908		mlast = NULL;
1909		for (;;) {
1910			int cc = min(len, MCLBYTES);
1911			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
1912			n->m_len = cc;
1913			if (mlast != NULL)
1914				mlast->m_next = n;
1915			mlast = n;
1916#if 0
1917			newipsecstat.ips_clcopied++;
1918#endif
1919
1920			len -= cc;
1921			if (len <= 0)
1922				break;
1923			off += cc;
1924
1925			n = m_getcl(how, m->m_type, m->m_flags);
1926			if (n == NULL) {
1927				m_freem(mfirst);
1928				m_freem(m0);
1929				return (NULL);
1930			}
1931		}
1932		n->m_next = m->m_next;
1933		if (mprev == NULL)
1934			m0 = mfirst;		/* new head of chain */
1935		else
1936			mprev->m_next = mfirst;	/* replace old mbuf */
1937		m_free(m);			/* release old mbuf */
1938		mprev = mfirst;
1939	}
1940	return (m0);
1941}
1942
1943#ifdef MBUF_PROFILING
1944
1945#define MP_BUCKETS 32 /* don't just change this as things may overflow.*/
1946struct mbufprofile {
1947	uintmax_t wasted[MP_BUCKETS];
1948	uintmax_t used[MP_BUCKETS];
1949	uintmax_t segments[MP_BUCKETS];
1950} mbprof;
1951
1952#define MP_MAXDIGITS 21	/* strlen("16,000,000,000,000,000,000") == 21 */
1953#define MP_NUMLINES 6
1954#define MP_NUMSPERLINE 16
1955#define MP_EXTRABYTES 64	/* > strlen("used:\nwasted:\nsegments:\n") */
1956/* work out max space needed and add a bit of spare space too */
1957#define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE)
1958#define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES)
1959
1960char mbprofbuf[MP_BUFSIZE];
1961
1962void
1963m_profile(struct mbuf *m)
1964{
1965	int segments = 0;
1966	int used = 0;
1967	int wasted = 0;
1968
1969	while (m) {
1970		segments++;
1971		used += m->m_len;
1972		if (m->m_flags & M_EXT) {
1973			wasted += MHLEN - sizeof(m->m_ext) +
1974			    m->m_ext.ext_size - m->m_len;
1975		} else {
1976			if (m->m_flags & M_PKTHDR)
1977				wasted += MHLEN - m->m_len;
1978			else
1979				wasted += MLEN - m->m_len;
1980		}
1981		m = m->m_next;
1982	}
1983	/* be paranoid.. it helps */
1984	if (segments > MP_BUCKETS - 1)
1985		segments = MP_BUCKETS - 1;
1986	if (used > 100000)
1987		used = 100000;
1988	if (wasted > 100000)
1989		wasted = 100000;
1990	/* store in the appropriate bucket */
1991	/* don't bother locking. if it's slightly off, so what? */
1992	mbprof.segments[segments]++;
1993	mbprof.used[fls(used)]++;
1994	mbprof.wasted[fls(wasted)]++;
1995}
1996
1997static void
1998mbprof_textify(void)
1999{
2000	int offset;
2001	char *c;
2002	u_int64_t *p;
2003
2004
2005	p = &mbprof.wasted[0];
2006	c = mbprofbuf;
2007	offset = snprintf(c, MP_MAXLINE + 10,
2008	    "wasted:\n"
2009	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2010	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2011	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2012	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2013#ifdef BIG_ARRAY
2014	p = &mbprof.wasted[16];
2015	c += offset;
2016	offset = snprintf(c, MP_MAXLINE,
2017	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2018	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2019	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2020	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2021#endif
2022	p = &mbprof.used[0];
2023	c += offset;
2024	offset = snprintf(c, MP_MAXLINE + 10,
2025	    "used:\n"
2026	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2027	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2028	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2029	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2030#ifdef BIG_ARRAY
2031	p = &mbprof.used[16];
2032	c += offset;
2033	offset = snprintf(c, MP_MAXLINE,
2034	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2035	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2036	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2037	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2038#endif
2039	p = &mbprof.segments[0];
2040	c += offset;
2041	offset = snprintf(c, MP_MAXLINE + 10,
2042	    "segments:\n"
2043	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2044	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
2045	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2046	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2047#ifdef BIG_ARRAY
2048	p = &mbprof.segments[16];
2049	c += offset;
2050	offset = snprintf(c, MP_MAXLINE,
2051	    "%ju %ju %ju %ju %ju %ju %ju %ju "
2052	    "%ju %ju %ju %ju %ju %ju %ju %jju",
2053	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
2054	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
2055#endif
2056}
2057
2058static int
2059mbprof_handler(SYSCTL_HANDLER_ARGS)
2060{
2061	int error;
2062
2063	mbprof_textify();
2064	error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1);
2065	return (error);
2066}
2067
2068static int
2069mbprof_clr_handler(SYSCTL_HANDLER_ARGS)
2070{
2071	int clear, error;
2072
2073	clear = 0;
2074	error = sysctl_handle_int(oidp, &clear, 0, req);
2075	if (error || !req->newptr)
2076		return (error);
2077
2078	if (clear) {
2079		bzero(&mbprof, sizeof(mbprof));
2080	}
2081
2082	return (error);
2083}
2084
2085
2086SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD,
2087	    NULL, 0, mbprof_handler, "A", "mbuf profiling statistics");
2088
2089SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW,
2090	    NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics");
2091#endif
2092
2093