uipc_mbuf.c revision 63203
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 63203 2000-07-15 06:02:48Z alfred $
35 */
36
37#include "opt_param.h"
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/kernel.h>
43#include <sys/sysctl.h>
44#include <sys/domain.h>
45#include <sys/protosw.h>
46
47#include <vm/vm.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_extern.h>
50
51#ifdef INVARIANTS
52#include <machine/cpu.h>
53#endif
54
55static void mbinit __P((void *));
56SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57
58struct mbuf *mbutl;
59char	*mclrefcnt;
60struct mbstat mbstat;
61u_long	mbtypes[MT_NTYPES];
62struct mbuf *mmbfree;
63union mcluster *mclfree;
64int	max_linkhdr;
65int	max_protohdr;
66int	max_hdr;
67int	max_datalen;
68int	nmbclusters;
69int	nmbufs;
70u_int	m_mballoc_wid = 0;
71u_int	m_clalloc_wid = 0;
72
73SYSCTL_DECL(_kern_ipc);
74SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
75	   &max_linkhdr, 0, "");
76SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
77	   &max_protohdr, 0, "");
78SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
79SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
80	   &max_datalen, 0, "");
81SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
82	   &mbuf_wait, 0, "");
83SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
84SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
85	   sizeof(mbtypes), "LU", "");
86SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
87	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
88SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
89	   "Maximum number of mbufs available");
90#ifndef NMBCLUSTERS
91#define NMBCLUSTERS	(512 + MAXUSERS * 16)
92#endif
93TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
94TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
95
96static void	m_reclaim __P((void));
97
98/* "number of clusters of pages" */
99#define NCL_INIT	1
100
101#define NMB_INIT	16
102
103/* ARGSUSED*/
104static void
105mbinit(dummy)
106	void *dummy;
107{
108	int s;
109
110	mmbfree = NULL; mclfree = NULL;
111	mbstat.m_msize = MSIZE;
112	mbstat.m_mclbytes = MCLBYTES;
113	mbstat.m_minclsize = MINCLSIZE;
114	mbstat.m_mlen = MLEN;
115	mbstat.m_mhlen = MHLEN;
116
117	s = splimp();
118	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
119		goto bad;
120#if MCLBYTES <= PAGE_SIZE
121	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
122		goto bad;
123#else
124	/* It's OK to call contigmalloc in this context. */
125	if (m_clalloc(16, M_WAIT) == 0)
126		goto bad;
127#endif
128	splx(s);
129	return;
130bad:
131	panic("mbinit");
132}
133
134/*
135 * Allocate at least nmb mbufs and place on mbuf free list.
136 * Must be called at splimp.
137 */
138/* ARGSUSED */
139int
140m_mballoc(nmb, how)
141	register int nmb;
142	int how;
143{
144	register caddr_t p;
145	register int i;
146	int nbytes;
147
148	/*
149	 * If we've hit the mbuf limit, stop allocating from mb_map,
150	 * (or trying to) in order to avoid dipping into the section of
151	 * mb_map which we've "reserved" for clusters.
152	 */
153	if ((nmb + mbstat.m_mbufs) > nmbufs)
154		return (0);
155
156	/*
157	 * Once we run out of map space, it will be impossible to get
158	 * any more (nothing is ever freed back to the map)
159	 * -- however you are not dead as m_reclaim might
160	 * still be able to free a substantial amount of space.
161	 *
162	 * XXX Furthermore, we can also work with "recycled" mbufs (when
163	 * we're calling with M_WAIT the sleep procedure will be woken
164	 * up when an mbuf is freed. See m_mballoc_wait()).
165	 */
166	if (mb_map_full)
167		return (0);
168
169	nbytes = round_page(nmb * MSIZE);
170	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
171	if (p == 0 && how == M_WAIT) {
172		mbstat.m_wait++;
173		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
174	}
175
176	/*
177	 * Either the map is now full, or `how' is M_NOWAIT and there
178	 * are no pages left.
179	 */
180	if (p == NULL)
181		return (0);
182
183	nmb = nbytes / MSIZE;
184	for (i = 0; i < nmb; i++) {
185		((struct mbuf *)p)->m_next = mmbfree;
186		mmbfree = (struct mbuf *)p;
187		p += MSIZE;
188	}
189	mbstat.m_mbufs += nmb;
190	mbtypes[MT_FREE] += nmb;
191	return (1);
192}
193
194/*
195 * Once the mb_map has been exhausted and if the call to the allocation macros
196 * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
197 * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a
198 * designated (mbuf_wait) time.
199 */
200struct mbuf *
201m_mballoc_wait(int caller, int type)
202{
203	struct mbuf *p;
204	int s;
205
206	m_mballoc_wid++;
207	if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
208		m_mballoc_wid--;
209
210	/*
211	 * Now that we (think) that we've got something, we will redo an
212	 * MGET, but avoid getting into another instance of m_mballoc_wait()
213	 * XXX: We retry to fetch _even_ if the sleep timed out. This is left
214	 *      this way, purposely, in the [unlikely] case that an mbuf was
215	 *      freed but the sleep was not awakened in time.
216	 */
217	p = NULL;
218	switch (caller) {
219	case MGET_C:
220		MGET(p, M_DONTWAIT, type);
221		break;
222	case MGETHDR_C:
223		MGETHDR(p, M_DONTWAIT, type);
224		break;
225	default:
226		panic("m_mballoc_wait: invalid caller (%d)", caller);
227	}
228
229	s = splimp();
230	if (p != NULL) {		/* We waited and got something... */
231		mbstat.m_wait++;
232		/* Wake up another if we have more free. */
233		if (mmbfree != NULL)
234			MMBWAKEUP();
235	}
236	splx(s);
237	return (p);
238}
239
240#if MCLBYTES > PAGE_SIZE
241static int i_want_my_mcl;
242
243static void
244kproc_mclalloc(void)
245{
246	int status;
247
248	while (1) {
249		tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
250
251		for (; i_want_my_mcl; i_want_my_mcl--) {
252			if (m_clalloc(1, M_WAIT) == 0)
253				printf("m_clalloc failed even in process context!\n");
254		}
255	}
256}
257
258static struct proc *mclallocproc;
259static struct kproc_desc mclalloc_kp = {
260	"mclalloc",
261	kproc_mclalloc,
262	&mclallocproc
263};
264SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
265	   &mclalloc_kp);
266#endif
267
268/*
269 * Allocate some number of mbuf clusters
270 * and place on cluster free list.
271 * Must be called at splimp.
272 */
273/* ARGSUSED */
274int
275m_clalloc(ncl, how)
276	register int ncl;
277	int how;
278{
279	register caddr_t p;
280	register int i;
281	int npg;
282
283	/*
284	 * If we've hit the mcluster number limit, stop allocating from
285	 * mb_map, (or trying to) in order to avoid dipping into the section
286	 * of mb_map which we've "reserved" for mbufs.
287	 */
288	if ((ncl + mbstat.m_clusters) > nmbclusters) {
289		mbstat.m_drops++;
290		return (0);
291	}
292
293	/*
294	 * Once we run out of map space, it will be impossible
295	 * to get any more (nothing is ever freed back to the
296	 * map). From this point on, we solely rely on freed
297	 * mclusters.
298	 */
299	if (mb_map_full) {
300		mbstat.m_drops++;
301		return (0);
302	}
303
304#if MCLBYTES > PAGE_SIZE
305	if (how != M_WAIT) {
306		i_want_my_mcl += ncl;
307		wakeup(&i_want_my_mcl);
308		mbstat.m_wait++;
309		p = 0;
310	} else {
311		p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
312				  ~0ul, PAGE_SIZE, 0, mb_map);
313	}
314#else
315	npg = ncl;
316	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
317				 how != M_WAIT ? M_NOWAIT : M_WAITOK);
318	ncl = ncl * PAGE_SIZE / MCLBYTES;
319#endif
320	/*
321	 * Either the map is now full, or `how' is M_NOWAIT and there
322	 * are no pages left.
323	 */
324	if (p == NULL) {
325		mbstat.m_drops++;
326		return (0);
327	}
328
329	for (i = 0; i < ncl; i++) {
330		((union mcluster *)p)->mcl_next = mclfree;
331		mclfree = (union mcluster *)p;
332		p += MCLBYTES;
333		mbstat.m_clfree++;
334	}
335	mbstat.m_clusters += ncl;
336	return (1);
337}
338
339/*
340 * Once the mb_map submap has been exhausted and the allocation is called with
341 * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
342 * sleep for a designated amount of time (mbuf_wait) or until we're woken up
343 * due to sudden mcluster availability.
344 */
345caddr_t
346m_clalloc_wait(void)
347{
348	caddr_t p;
349	int s;
350
351#ifdef __i386__
352	/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
353	KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
354#endif
355
356	/* Sleep until something's available or until we expire. */
357	m_clalloc_wid++;
358	if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
359		m_clalloc_wid--;
360
361	/*
362	 * Now that we (think) that we've got something, we will redo and
363	 * MGET, but avoid getting into another instance of m_clalloc_wait()
364	 */
365	p = NULL;
366	MCLALLOC(p, M_DONTWAIT);
367
368	s = splimp();
369	if (p != NULL) {	/* We waited and got something... */
370		mbstat.m_wait++;
371		/* Wake up another if we have more free. */
372		if (mclfree != NULL)
373			MCLWAKEUP();
374	}
375
376	splx(s);
377	return (p);
378}
379
380/*
381 * When MGET fails, ask protocols to free space when short of memory,
382 * then re-attempt to allocate an mbuf.
383 */
384struct mbuf *
385m_retry(i, t)
386	int i, t;
387{
388	register struct mbuf *m;
389
390	/*
391	 * Must only do the reclaim if not in an interrupt context.
392	 */
393	if (i == M_WAIT) {
394#ifdef __i386__
395		KASSERT(intr_nesting_level == 0,
396		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
397#endif
398		m_reclaim();
399	}
400
401	/*
402	 * Both m_mballoc_wait and m_retry must be nulled because
403	 * when the MGET macro is run from here, we deffinately do _not_
404	 * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
405	 */
406#define m_mballoc_wait(caller,type)    (struct mbuf *)0
407#define m_retry(i, t)	(struct mbuf *)0
408	MGET(m, i, t);
409#undef m_retry
410#undef m_mballoc_wait
411
412	if (m != NULL)
413		mbstat.m_wait++;
414	else
415		mbstat.m_drops++;
416
417	return (m);
418}
419
420/*
421 * As above; retry an MGETHDR.
422 */
423struct mbuf *
424m_retryhdr(i, t)
425	int i, t;
426{
427	register struct mbuf *m;
428
429	/*
430	 * Must only do the reclaim if not in an interrupt context.
431	 */
432	if (i == M_WAIT) {
433#ifdef __i386__
434		KASSERT(intr_nesting_level == 0,
435		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
436#endif
437		m_reclaim();
438	}
439
440#define m_mballoc_wait(caller,type)    (struct mbuf *)0
441#define m_retryhdr(i, t) (struct mbuf *)0
442	MGETHDR(m, i, t);
443#undef m_retryhdr
444#undef m_mballoc_wait
445
446	if (m != NULL)
447		mbstat.m_wait++;
448	else
449		mbstat.m_drops++;
450
451	return (m);
452}
453
454static void
455m_reclaim()
456{
457	register struct domain *dp;
458	register struct protosw *pr;
459	int s = splimp();
460
461	for (dp = domains; dp; dp = dp->dom_next)
462		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
463			if (pr->pr_drain)
464				(*pr->pr_drain)();
465	splx(s);
466	mbstat.m_drain++;
467}
468
469/*
470 * Space allocation routines.
471 * These are also available as macros
472 * for critical paths.
473 */
474struct mbuf *
475m_get(how, type)
476	int how, type;
477{
478	register struct mbuf *m;
479
480	MGET(m, how, type);
481	return (m);
482}
483
484struct mbuf *
485m_gethdr(how, type)
486	int how, type;
487{
488	register struct mbuf *m;
489
490	MGETHDR(m, how, type);
491	return (m);
492}
493
494struct mbuf *
495m_getclr(how, type)
496	int how, type;
497{
498	register struct mbuf *m;
499
500	MGET(m, how, type);
501	if (m == 0)
502		return (0);
503	bzero(mtod(m, caddr_t), MLEN);
504	return (m);
505}
506
507struct mbuf *
508m_free(m)
509	struct mbuf *m;
510{
511	register struct mbuf *n;
512
513	MFREE(m, n);
514	return (n);
515}
516
517void
518m_freem(m)
519	register struct mbuf *m;
520{
521	register struct mbuf *n;
522
523	if (m == NULL)
524		return;
525	do {
526		/*
527		 * we do need to check non-first mbuf, since some of existing
528		 * code does not call M_PREPEND properly.
529		 * (example: call to bpf_mtap from drivers)
530		 */
531		if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) {
532			m_freem(m->m_pkthdr.aux);
533			m->m_pkthdr.aux = NULL;
534		}
535		MFREE(m, n);
536		m = n;
537	} while (m);
538}
539
540/*
541 * Mbuffer utility routines.
542 */
543
544/*
545 * Lesser-used path for M_PREPEND:
546 * allocate new mbuf to prepend to chain,
547 * copy junk along.
548 */
549struct mbuf *
550m_prepend(m, len, how)
551	register struct mbuf *m;
552	int len, how;
553{
554	struct mbuf *mn;
555
556	MGET(mn, how, m->m_type);
557	if (mn == (struct mbuf *)NULL) {
558		m_freem(m);
559		return ((struct mbuf *)NULL);
560	}
561	if (m->m_flags & M_PKTHDR) {
562		M_COPY_PKTHDR(mn, m);
563		m->m_flags &= ~M_PKTHDR;
564	}
565	mn->m_next = m;
566	m = mn;
567	if (len < MHLEN)
568		MH_ALIGN(m, len);
569	m->m_len = len;
570	return (m);
571}
572
573/*
574 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
575 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
576 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
577 * Note that the copy is read-only, because clusters are not copied,
578 * only their reference counts are incremented.
579 */
580#define MCFail (mbstat.m_mcfail)
581
582struct mbuf *
583m_copym(m, off0, len, wait)
584	register struct mbuf *m;
585	int off0, wait;
586	register int len;
587{
588	register struct mbuf *n, **np;
589	register int off = off0;
590	struct mbuf *top;
591	int copyhdr = 0;
592
593	KASSERT(off >= 0, ("m_copym, negative off %d", off));
594	KASSERT(len >= 0, ("m_copym, negative len %d", len));
595	if (off == 0 && m->m_flags & M_PKTHDR)
596		copyhdr = 1;
597	while (off > 0) {
598		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
599		if (off < m->m_len)
600			break;
601		off -= m->m_len;
602		m = m->m_next;
603	}
604	np = &top;
605	top = 0;
606	while (len > 0) {
607		if (m == 0) {
608			KASSERT(len == M_COPYALL,
609			    ("m_copym, length > size of mbuf chain"));
610			break;
611		}
612		MGET(n, wait, m->m_type);
613		*np = n;
614		if (n == 0)
615			goto nospace;
616		if (copyhdr) {
617			M_COPY_PKTHDR(n, m);
618			if (len == M_COPYALL)
619				n->m_pkthdr.len -= off0;
620			else
621				n->m_pkthdr.len = len;
622			copyhdr = 0;
623		}
624		n->m_len = min(len, m->m_len - off);
625		if (m->m_flags & M_EXT) {
626			n->m_data = m->m_data + off;
627			if(!m->m_ext.ext_ref)
628				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
629			else
630				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
631							m->m_ext.ext_size);
632			n->m_ext = m->m_ext;
633			n->m_flags |= M_EXT;
634		} else
635			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
636			    (unsigned)n->m_len);
637		if (len != M_COPYALL)
638			len -= n->m_len;
639		off = 0;
640		m = m->m_next;
641		np = &n->m_next;
642	}
643	if (top == 0)
644		MCFail++;
645	return (top);
646nospace:
647	m_freem(top);
648	MCFail++;
649	return (0);
650}
651
652/*
653 * Copy an entire packet, including header (which must be present).
654 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
655 * Note that the copy is read-only, because clusters are not copied,
656 * only their reference counts are incremented.
657 */
658struct mbuf *
659m_copypacket(m, how)
660	struct mbuf *m;
661	int how;
662{
663	struct mbuf *top, *n, *o;
664
665	MGET(n, how, m->m_type);
666	top = n;
667	if (!n)
668		goto nospace;
669
670	M_COPY_PKTHDR(n, m);
671	n->m_len = m->m_len;
672	if (m->m_flags & M_EXT) {
673		n->m_data = m->m_data;
674		if(!m->m_ext.ext_ref)
675			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
676		else
677			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
678						m->m_ext.ext_size);
679		n->m_ext = m->m_ext;
680		n->m_flags |= M_EXT;
681	} else {
682		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
683	}
684
685	m = m->m_next;
686	while (m) {
687		MGET(o, how, m->m_type);
688		if (!o)
689			goto nospace;
690
691		n->m_next = o;
692		n = n->m_next;
693
694		n->m_len = m->m_len;
695		if (m->m_flags & M_EXT) {
696			n->m_data = m->m_data;
697			if(!m->m_ext.ext_ref)
698				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
699			else
700				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
701							m->m_ext.ext_size);
702			n->m_ext = m->m_ext;
703			n->m_flags |= M_EXT;
704		} else {
705			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
706		}
707
708		m = m->m_next;
709	}
710	return top;
711nospace:
712	m_freem(top);
713	MCFail++;
714	return 0;
715}
716
717/*
718 * Copy data from an mbuf chain starting "off" bytes from the beginning,
719 * continuing for "len" bytes, into the indicated buffer.
720 */
721void
722m_copydata(m, off, len, cp)
723	register struct mbuf *m;
724	register int off;
725	register int len;
726	caddr_t cp;
727{
728	register unsigned count;
729
730	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
731	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
732	while (off > 0) {
733		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
734		if (off < m->m_len)
735			break;
736		off -= m->m_len;
737		m = m->m_next;
738	}
739	while (len > 0) {
740		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
741		count = min(m->m_len - off, len);
742		bcopy(mtod(m, caddr_t) + off, cp, count);
743		len -= count;
744		cp += count;
745		off = 0;
746		m = m->m_next;
747	}
748}
749
750/*
751 * Copy a packet header mbuf chain into a completely new chain, including
752 * copying any mbuf clusters.  Use this instead of m_copypacket() when
753 * you need a writable copy of an mbuf chain.
754 */
755struct mbuf *
756m_dup(m, how)
757	struct mbuf *m;
758	int how;
759{
760	struct mbuf **p, *top = NULL;
761	int remain, moff, nsize;
762
763	/* Sanity check */
764	if (m == NULL)
765		return (0);
766	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
767
768	/* While there's more data, get a new mbuf, tack it on, and fill it */
769	remain = m->m_pkthdr.len;
770	moff = 0;
771	p = &top;
772	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
773		struct mbuf *n;
774
775		/* Get the next new mbuf */
776		MGET(n, how, m->m_type);
777		if (n == NULL)
778			goto nospace;
779		if (top == NULL) {		/* first one, must be PKTHDR */
780			M_COPY_PKTHDR(n, m);
781			nsize = MHLEN;
782		} else				/* not the first one */
783			nsize = MLEN;
784		if (remain >= MINCLSIZE) {
785			MCLGET(n, how);
786			if ((n->m_flags & M_EXT) == 0) {
787				(void)m_free(n);
788				goto nospace;
789			}
790			nsize = MCLBYTES;
791		}
792		n->m_len = 0;
793
794		/* Link it into the new chain */
795		*p = n;
796		p = &n->m_next;
797
798		/* Copy data from original mbuf(s) into new mbuf */
799		while (n->m_len < nsize && m != NULL) {
800			int chunk = min(nsize - n->m_len, m->m_len - moff);
801
802			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
803			moff += chunk;
804			n->m_len += chunk;
805			remain -= chunk;
806			if (moff == m->m_len) {
807				m = m->m_next;
808				moff = 0;
809			}
810		}
811
812		/* Check correct total mbuf length */
813		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
814		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
815	}
816	return (top);
817
818nospace:
819	m_freem(top);
820	MCFail++;
821	return (0);
822}
823
824/*
825 * Concatenate mbuf chain n to m.
826 * Both chains must be of the same type (e.g. MT_DATA).
827 * Any m_pkthdr is not updated.
828 */
829void
830m_cat(m, n)
831	register struct mbuf *m, *n;
832{
833	while (m->m_next)
834		m = m->m_next;
835	while (n) {
836		if (m->m_flags & M_EXT ||
837		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
838			/* just join the two chains */
839			m->m_next = n;
840			return;
841		}
842		/* splat the data from one into the other */
843		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
844		    (u_int)n->m_len);
845		m->m_len += n->m_len;
846		n = m_free(n);
847	}
848}
849
850void
851m_adj(mp, req_len)
852	struct mbuf *mp;
853	int req_len;
854{
855	register int len = req_len;
856	register struct mbuf *m;
857	register int count;
858
859	if ((m = mp) == NULL)
860		return;
861	if (len >= 0) {
862		/*
863		 * Trim from head.
864		 */
865		while (m != NULL && len > 0) {
866			if (m->m_len <= len) {
867				len -= m->m_len;
868				m->m_len = 0;
869				m = m->m_next;
870			} else {
871				m->m_len -= len;
872				m->m_data += len;
873				len = 0;
874			}
875		}
876		m = mp;
877		if (mp->m_flags & M_PKTHDR)
878			m->m_pkthdr.len -= (req_len - len);
879	} else {
880		/*
881		 * Trim from tail.  Scan the mbuf chain,
882		 * calculating its length and finding the last mbuf.
883		 * If the adjustment only affects this mbuf, then just
884		 * adjust and return.  Otherwise, rescan and truncate
885		 * after the remaining size.
886		 */
887		len = -len;
888		count = 0;
889		for (;;) {
890			count += m->m_len;
891			if (m->m_next == (struct mbuf *)0)
892				break;
893			m = m->m_next;
894		}
895		if (m->m_len >= len) {
896			m->m_len -= len;
897			if (mp->m_flags & M_PKTHDR)
898				mp->m_pkthdr.len -= len;
899			return;
900		}
901		count -= len;
902		if (count < 0)
903			count = 0;
904		/*
905		 * Correct length for chain is "count".
906		 * Find the mbuf with last data, adjust its length,
907		 * and toss data from remaining mbufs on chain.
908		 */
909		m = mp;
910		if (m->m_flags & M_PKTHDR)
911			m->m_pkthdr.len = count;
912		for (; m; m = m->m_next) {
913			if (m->m_len >= count) {
914				m->m_len = count;
915				break;
916			}
917			count -= m->m_len;
918		}
919		while (m->m_next)
920			(m = m->m_next) ->m_len = 0;
921	}
922}
923
924/*
925 * Rearange an mbuf chain so that len bytes are contiguous
926 * and in the data area of an mbuf (so that mtod and dtom
927 * will work for a structure of size len).  Returns the resulting
928 * mbuf chain on success, frees it and returns null on failure.
929 * If there is room, it will add up to max_protohdr-len extra bytes to the
930 * contiguous region in an attempt to avoid being called next time.
931 */
932#define MPFail (mbstat.m_mpfail)
933
934struct mbuf *
935m_pullup(n, len)
936	register struct mbuf *n;
937	int len;
938{
939	register struct mbuf *m;
940	register int count;
941	int space;
942
943	/*
944	 * If first mbuf has no cluster, and has room for len bytes
945	 * without shifting current data, pullup into it,
946	 * otherwise allocate a new mbuf to prepend to the chain.
947	 */
948	if ((n->m_flags & M_EXT) == 0 &&
949	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
950		if (n->m_len >= len)
951			return (n);
952		m = n;
953		n = n->m_next;
954		len -= m->m_len;
955	} else {
956		if (len > MHLEN)
957			goto bad;
958		MGET(m, M_DONTWAIT, n->m_type);
959		if (m == 0)
960			goto bad;
961		m->m_len = 0;
962		if (n->m_flags & M_PKTHDR) {
963			M_COPY_PKTHDR(m, n);
964			n->m_flags &= ~M_PKTHDR;
965		}
966	}
967	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
968	do {
969		count = min(min(max(len, max_protohdr), space), n->m_len);
970		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
971		  (unsigned)count);
972		len -= count;
973		m->m_len += count;
974		n->m_len -= count;
975		space -= count;
976		if (n->m_len)
977			n->m_data += count;
978		else
979			n = m_free(n);
980	} while (len > 0 && n);
981	if (len > 0) {
982		(void) m_free(m);
983		goto bad;
984	}
985	m->m_next = n;
986	return (m);
987bad:
988	m_freem(n);
989	MPFail++;
990	return (0);
991}
992
993/*
994 * Partition an mbuf chain in two pieces, returning the tail --
995 * all but the first len0 bytes.  In case of failure, it returns NULL and
996 * attempts to restore the chain to its original state.
997 */
998struct mbuf *
999m_split(m0, len0, wait)
1000	register struct mbuf *m0;
1001	int len0, wait;
1002{
1003	register struct mbuf *m, *n;
1004	unsigned len = len0, remain;
1005
1006	for (m = m0; m && len > m->m_len; m = m->m_next)
1007		len -= m->m_len;
1008	if (m == 0)
1009		return (0);
1010	remain = m->m_len - len;
1011	if (m0->m_flags & M_PKTHDR) {
1012		MGETHDR(n, wait, m0->m_type);
1013		if (n == 0)
1014			return (0);
1015		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1016		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1017		m0->m_pkthdr.len = len0;
1018		if (m->m_flags & M_EXT)
1019			goto extpacket;
1020		if (remain > MHLEN) {
1021			/* m can't be the lead packet */
1022			MH_ALIGN(n, 0);
1023			n->m_next = m_split(m, len, wait);
1024			if (n->m_next == 0) {
1025				(void) m_free(n);
1026				return (0);
1027			} else
1028				return (n);
1029		} else
1030			MH_ALIGN(n, remain);
1031	} else if (remain == 0) {
1032		n = m->m_next;
1033		m->m_next = 0;
1034		return (n);
1035	} else {
1036		MGET(n, wait, m->m_type);
1037		if (n == 0)
1038			return (0);
1039		M_ALIGN(n, remain);
1040	}
1041extpacket:
1042	if (m->m_flags & M_EXT) {
1043		n->m_flags |= M_EXT;
1044		n->m_ext = m->m_ext;
1045		if(!m->m_ext.ext_ref)
1046			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
1047		else
1048			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
1049						m->m_ext.ext_size);
1050		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
1051		n->m_data = m->m_data + len;
1052	} else {
1053		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1054	}
1055	n->m_len = remain;
1056	m->m_len = len;
1057	n->m_next = m->m_next;
1058	m->m_next = 0;
1059	return (n);
1060}
1061/*
1062 * Routine to copy from device local memory into mbufs.
1063 */
1064struct mbuf *
1065m_devget(buf, totlen, off0, ifp, copy)
1066	char *buf;
1067	int totlen, off0;
1068	struct ifnet *ifp;
1069	void (*copy) __P((char *from, caddr_t to, u_int len));
1070{
1071	register struct mbuf *m;
1072	struct mbuf *top = 0, **mp = &top;
1073	register int off = off0, len;
1074	register char *cp;
1075	char *epkt;
1076
1077	cp = buf;
1078	epkt = cp + totlen;
1079	if (off) {
1080		cp += off + 2 * sizeof(u_short);
1081		totlen -= 2 * sizeof(u_short);
1082	}
1083	MGETHDR(m, M_DONTWAIT, MT_DATA);
1084	if (m == 0)
1085		return (0);
1086	m->m_pkthdr.rcvif = ifp;
1087	m->m_pkthdr.len = totlen;
1088	m->m_len = MHLEN;
1089
1090	while (totlen > 0) {
1091		if (top) {
1092			MGET(m, M_DONTWAIT, MT_DATA);
1093			if (m == 0) {
1094				m_freem(top);
1095				return (0);
1096			}
1097			m->m_len = MLEN;
1098		}
1099		len = min(totlen, epkt - cp);
1100		if (len >= MINCLSIZE) {
1101			MCLGET(m, M_DONTWAIT);
1102			if (m->m_flags & M_EXT)
1103				m->m_len = len = min(len, MCLBYTES);
1104			else
1105				len = m->m_len;
1106		} else {
1107			/*
1108			 * Place initial small packet/header at end of mbuf.
1109			 */
1110			if (len < m->m_len) {
1111				if (top == 0 && len + max_linkhdr <= m->m_len)
1112					m->m_data += max_linkhdr;
1113				m->m_len = len;
1114			} else
1115				len = m->m_len;
1116		}
1117		if (copy)
1118			copy(cp, mtod(m, caddr_t), (unsigned)len);
1119		else
1120			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
1121		cp += len;
1122		*mp = m;
1123		mp = &m->m_next;
1124		totlen -= len;
1125		if (cp == epkt)
1126			cp = buf;
1127	}
1128	return (top);
1129}
1130
1131/*
1132 * Copy data from a buffer back into the indicated mbuf chain,
1133 * starting "off" bytes from the beginning, extending the mbuf
1134 * chain if necessary.
1135 */
1136void
1137m_copyback(m0, off, len, cp)
1138	struct	mbuf *m0;
1139	register int off;
1140	register int len;
1141	caddr_t cp;
1142{
1143	register int mlen;
1144	register struct mbuf *m = m0, *n;
1145	int totlen = 0;
1146
1147	if (m0 == 0)
1148		return;
1149	while (off > (mlen = m->m_len)) {
1150		off -= mlen;
1151		totlen += mlen;
1152		if (m->m_next == 0) {
1153			n = m_getclr(M_DONTWAIT, m->m_type);
1154			if (n == 0)
1155				goto out;
1156			n->m_len = min(MLEN, len + off);
1157			m->m_next = n;
1158		}
1159		m = m->m_next;
1160	}
1161	while (len > 0) {
1162		mlen = min (m->m_len - off, len);
1163		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1164		cp += mlen;
1165		len -= mlen;
1166		mlen += off;
1167		off = 0;
1168		totlen += mlen;
1169		if (len == 0)
1170			break;
1171		if (m->m_next == 0) {
1172			n = m_get(M_DONTWAIT, m->m_type);
1173			if (n == 0)
1174				break;
1175			n->m_len = min(MLEN, len);
1176			m->m_next = n;
1177		}
1178		m = m->m_next;
1179	}
1180out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1181		m->m_pkthdr.len = totlen;
1182}
1183
1184void
1185m_print(const struct mbuf *m)
1186{
1187	int len;
1188	const struct mbuf *m2;
1189
1190	len = m->m_pkthdr.len;
1191	m2 = m;
1192	while (len) {
1193		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
1194		len -= m2->m_len;
1195		m2 = m2->m_next;
1196	}
1197	return;
1198}
1199