uipc_mbuf.c revision 54906
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 54906 1999-12-20 18:10:00Z eivind $
35 */
36
37#include "opt_param.h"
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/kernel.h>
43#include <sys/sysctl.h>
44#include <sys/domain.h>
45#include <sys/protosw.h>
46
47#include <vm/vm.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_extern.h>
50
51#ifdef INVARIANTS
52#include <machine/cpu.h>
53#endif
54
55static void mbinit __P((void *));
56SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57
58struct mbuf *mbutl;
59char	*mclrefcnt;
60struct mbstat mbstat;
61struct mbuf *mmbfree;
62union mcluster *mclfree;
63int	max_linkhdr;
64int	max_protohdr;
65int	max_hdr;
66int	max_datalen;
67int	nmbclusters;
68int	nmbufs;
69u_int	m_mballoc_wid = 0;
70u_int	m_clalloc_wid = 0;
71
72SYSCTL_DECL(_kern_ipc);
73SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
74	   &max_linkhdr, 0, "");
75SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
76	   &max_protohdr, 0, "");
77SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
78SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
79	   &max_datalen, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
81	   &mbuf_wait, 0, "");
82SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
83SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
84	   &nmbclusters, 0, "Maximum number of mbuf clusters avaliable");
85#ifndef NMBCLUSTERS
86#define NMBCLUSTERS	(512 + MAXUSERS * 16)
87#endif
88TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
89TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);	/* XXX fixup? */
90
91static void	m_reclaim __P((void));
92
93/* "number of clusters of pages" */
94#define NCL_INIT	1
95
96#define NMB_INIT	16
97
98/* ARGSUSED*/
99static void
100mbinit(dummy)
101	void *dummy;
102{
103	int s;
104
105	mmbfree = NULL; mclfree = NULL;
106	mbstat.m_msize = MSIZE;
107	mbstat.m_mclbytes = MCLBYTES;
108	mbstat.m_minclsize = MINCLSIZE;
109	mbstat.m_mlen = MLEN;
110	mbstat.m_mhlen = MHLEN;
111
112	s = splimp();
113	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
114		goto bad;
115#if MCLBYTES <= PAGE_SIZE
116	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
117		goto bad;
118#else
119	/* It's OK to call contigmalloc in this context. */
120	if (m_clalloc(16, M_WAIT) == 0)
121		goto bad;
122#endif
123	splx(s);
124	return;
125bad:
126	panic("mbinit");
127}
128
129/*
130 * Allocate at least nmb mbufs and place on mbuf free list.
131 * Must be called at splimp.
132 */
133/* ARGSUSED */
134int
135m_mballoc(nmb, how)
136	register int nmb;
137	int how;
138{
139	register caddr_t p;
140	register int i;
141	int nbytes;
142
143	/*
144	 * Once we run out of map space, it will be impossible to get
145	 * any more (nothing is ever freed back to the map)
146	 * -- however you are not dead as m_reclaim might
147	 * still be able to free a substantial amount of space.
148	 *
149	 * XXX Furthermore, we can also work with "recycled" mbufs (when
150	 * we're calling with M_WAIT the sleep procedure will be woken
151	 * up when an mbuf is freed. See m_mballoc_wait()).
152	 */
153	if (mb_map_full)
154		return (0);
155
156	nbytes = round_page(nmb * MSIZE);
157	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
158	if (p == 0 && how == M_WAIT) {
159		mbstat.m_wait++;
160		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
161	}
162
163	/*
164	 * Either the map is now full, or `how' is M_NOWAIT and there
165	 * are no pages left.
166	 */
167	if (p == NULL)
168		return (0);
169
170	nmb = nbytes / MSIZE;
171	for (i = 0; i < nmb; i++) {
172		((struct mbuf *)p)->m_next = mmbfree;
173		mmbfree = (struct mbuf *)p;
174		p += MSIZE;
175	}
176	mbstat.m_mbufs += nmb;
177	return (1);
178}
179
180/*
181 * Once the mb_map has been exhausted and if the call to the allocation macros
182 * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
183 * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a
184 * designated (mbuf_wait) time.
185 */
186struct mbuf *
187m_mballoc_wait(int caller, int type)
188{
189	struct mbuf *p;
190	int s;
191
192	m_mballoc_wid++;
193	if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
194		m_mballoc_wid--;
195
196	/*
197	 * Now that we (think) that we've got something, we will redo an
198	 * MGET, but avoid getting into another instance of m_mballoc_wait()
199	 * XXX: We retry to fetch _even_ if the sleep timed out. This is left
200	 *      this way, purposely, in the [unlikely] case that an mbuf was
201	 *      freed but the sleep was not awakened in time.
202	 */
203	p = NULL;
204	switch (caller) {
205	case MGET_C:
206		MGET(p, M_DONTWAIT, type);
207		break;
208	case MGETHDR_C:
209		MGETHDR(p, M_DONTWAIT, type);
210		break;
211	default:
212		panic("m_mballoc_wait: invalid caller (%d)", caller);
213	}
214
215	s = splimp();
216	if (p != NULL) {		/* We waited and got something... */
217		mbstat.m_wait++;
218		/* Wake up another if we have more free. */
219		if (mmbfree != NULL)
220			MMBWAKEUP();
221	}
222	splx(s);
223	return (p);
224}
225
226#if MCLBYTES > PAGE_SIZE
227static int i_want_my_mcl;
228
229static void
230kproc_mclalloc(void)
231{
232	int status;
233
234	while (1) {
235		tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
236
237		for (; i_want_my_mcl; i_want_my_mcl--) {
238			if (m_clalloc(1, M_WAIT) == 0)
239				printf("m_clalloc failed even in process context!\n");
240		}
241	}
242}
243
244static struct proc *mclallocproc;
245static struct kproc_desc mclalloc_kp = {
246	"mclalloc",
247	kproc_mclalloc,
248	&mclallocproc
249};
250SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
251	   &mclalloc_kp);
252#endif
253
254/*
255 * Allocate some number of mbuf clusters
256 * and place on cluster free list.
257 * Must be called at splimp.
258 */
259/* ARGSUSED */
260int
261m_clalloc(ncl, how)
262	register int ncl;
263	int how;
264{
265	register caddr_t p;
266	register int i;
267	int npg;
268
269	/*
270	 * Once we run out of map space, it will be impossible
271	 * to get any more (nothing is ever freed back to the
272	 * map). From this point on, we solely rely on freed
273	 * mclusters.
274	 */
275	if (mb_map_full) {
276		mbstat.m_drops++;
277		return (0);
278	}
279
280#if MCLBYTES > PAGE_SIZE
281	if (how != M_WAIT) {
282		i_want_my_mcl += ncl;
283		wakeup(&i_want_my_mcl);
284		mbstat.m_wait++;
285		p = 0;
286	} else {
287		p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
288				  ~0ul, PAGE_SIZE, 0, mb_map);
289	}
290#else
291	npg = ncl;
292	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
293				 how != M_WAIT ? M_NOWAIT : M_WAITOK);
294	ncl = ncl * PAGE_SIZE / MCLBYTES;
295#endif
296	/*
297	 * Either the map is now full, or `how' is M_NOWAIT and there
298	 * are no pages left.
299	 */
300	if (p == NULL) {
301		mbstat.m_drops++;
302		return (0);
303	}
304
305	for (i = 0; i < ncl; i++) {
306		((union mcluster *)p)->mcl_next = mclfree;
307		mclfree = (union mcluster *)p;
308		p += MCLBYTES;
309		mbstat.m_clfree++;
310	}
311	mbstat.m_clusters += ncl;
312	return (1);
313}
314
315/*
316 * Once the mb_map submap has been exhausted and the allocation is called with
317 * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
318 * sleep for a designated amount of time (mbuf_wait) or until we're woken up
319 * due to sudden mcluster availability.
320 */
321caddr_t
322m_clalloc_wait(void)
323{
324	caddr_t p;
325	int s;
326
327#ifdef __i386__
328	/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
329	KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
330#endif
331
332	/* Sleep until something's available or until we expire. */
333	m_clalloc_wid++;
334	if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
335		m_clalloc_wid--;
336
337	/*
338	 * Now that we (think) that we've got something, we will redo and
339	 * MGET, but avoid getting into another instance of m_clalloc_wait()
340	 */
341	p = NULL;
342	MCLALLOC(p, M_DONTWAIT);
343
344	s = splimp();
345	if (p != NULL) {	/* We waited and got something... */
346		mbstat.m_wait++;
347		/* Wake up another if we have more free. */
348		if (mclfree != NULL)
349			MCLWAKEUP();
350	}
351
352	splx(s);
353	return (p);
354}
355
356/*
357 * When MGET fails, ask protocols to free space when short of memory,
358 * then re-attempt to allocate an mbuf.
359 */
360struct mbuf *
361m_retry(i, t)
362	int i, t;
363{
364	register struct mbuf *m;
365
366	/*
367	 * Must only do the reclaim if not in an interrupt context.
368	 */
369	if (i == M_WAIT) {
370#ifdef __i386__
371		KASSERT(intr_nesting_level == 0,
372		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
373#endif
374		m_reclaim();
375	}
376
377	/*
378	 * Both m_mballoc_wait and m_retry must be nulled because
379	 * when the MGET macro is run from here, we deffinately do _not_
380	 * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
381	 */
382#define m_mballoc_wait(caller,type)    (struct mbuf *)0
383#define m_retry(i, t)	(struct mbuf *)0
384	MGET(m, i, t);
385#undef m_retry
386#undef m_mballoc_wait
387
388	if (m != NULL)
389		mbstat.m_wait++;
390	else
391		mbstat.m_drops++;
392
393	return (m);
394}
395
396/*
397 * As above; retry an MGETHDR.
398 */
399struct mbuf *
400m_retryhdr(i, t)
401	int i, t;
402{
403	register struct mbuf *m;
404
405	/*
406	 * Must only do the reclaim if not in an interrupt context.
407	 */
408	if (i == M_WAIT) {
409#ifdef __i386__
410		KASSERT(intr_nesting_level == 0,
411		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
412#endif
413		m_reclaim();
414	}
415
416#define m_mballoc_wait(caller,type)    (struct mbuf *)0
417#define m_retryhdr(i, t) (struct mbuf *)0
418	MGETHDR(m, i, t);
419#undef m_retryhdr
420#undef m_mballoc_wait
421
422	if (m != NULL)
423		mbstat.m_wait++;
424	else
425		mbstat.m_drops++;
426
427	return (m);
428}
429
430static void
431m_reclaim()
432{
433	register struct domain *dp;
434	register struct protosw *pr;
435	int s = splimp();
436
437	for (dp = domains; dp; dp = dp->dom_next)
438		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
439			if (pr->pr_drain)
440				(*pr->pr_drain)();
441	splx(s);
442	mbstat.m_drain++;
443}
444
445/*
446 * Space allocation routines.
447 * These are also available as macros
448 * for critical paths.
449 */
450struct mbuf *
451m_get(how, type)
452	int how, type;
453{
454	register struct mbuf *m;
455
456	MGET(m, how, type);
457	return (m);
458}
459
460struct mbuf *
461m_gethdr(how, type)
462	int how, type;
463{
464	register struct mbuf *m;
465
466	MGETHDR(m, how, type);
467	return (m);
468}
469
470struct mbuf *
471m_getclr(how, type)
472	int how, type;
473{
474	register struct mbuf *m;
475
476	MGET(m, how, type);
477	if (m == 0)
478		return (0);
479	bzero(mtod(m, caddr_t), MLEN);
480	return (m);
481}
482
483struct mbuf *
484m_free(m)
485	struct mbuf *m;
486{
487	register struct mbuf *n;
488
489	MFREE(m, n);
490	return (n);
491}
492
493void
494m_freem(m)
495	register struct mbuf *m;
496{
497	register struct mbuf *n;
498
499	if (m == NULL)
500		return;
501	do {
502		MFREE(m, n);
503		m = n;
504	} while (m);
505}
506
507/*
508 * Mbuffer utility routines.
509 */
510
511/*
512 * Lesser-used path for M_PREPEND:
513 * allocate new mbuf to prepend to chain,
514 * copy junk along.
515 */
516struct mbuf *
517m_prepend(m, len, how)
518	register struct mbuf *m;
519	int len, how;
520{
521	struct mbuf *mn;
522
523	MGET(mn, how, m->m_type);
524	if (mn == (struct mbuf *)NULL) {
525		m_freem(m);
526		return ((struct mbuf *)NULL);
527	}
528	if (m->m_flags & M_PKTHDR) {
529		M_COPY_PKTHDR(mn, m);
530		m->m_flags &= ~M_PKTHDR;
531	}
532	mn->m_next = m;
533	m = mn;
534	if (len < MHLEN)
535		MH_ALIGN(m, len);
536	m->m_len = len;
537	return (m);
538}
539
540/*
541 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
542 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
543 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
544 * Note that the copy is read-only, because clusters are not copied,
545 * only their reference counts are incremented.
546 */
547#define MCFail (mbstat.m_mcfail)
548
549struct mbuf *
550m_copym(m, off0, len, wait)
551	register struct mbuf *m;
552	int off0, wait;
553	register int len;
554{
555	register struct mbuf *n, **np;
556	register int off = off0;
557	struct mbuf *top;
558	int copyhdr = 0;
559
560	KASSERT(off >= 0, ("m_copym, negative off %d", off));
561	KASSERT(len >= 0, ("m_copym, negative len %d", len));
562	if (off == 0 && m->m_flags & M_PKTHDR)
563		copyhdr = 1;
564	while (off > 0) {
565		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
566		if (off < m->m_len)
567			break;
568		off -= m->m_len;
569		m = m->m_next;
570	}
571	np = &top;
572	top = 0;
573	while (len > 0) {
574		if (m == 0) {
575			KASSERT(len == M_COPYALL,
576			    ("m_copym, length > size of mbuf chain"));
577			break;
578		}
579		MGET(n, wait, m->m_type);
580		*np = n;
581		if (n == 0)
582			goto nospace;
583		if (copyhdr) {
584			M_COPY_PKTHDR(n, m);
585			if (len == M_COPYALL)
586				n->m_pkthdr.len -= off0;
587			else
588				n->m_pkthdr.len = len;
589			copyhdr = 0;
590		}
591		n->m_len = min(len, m->m_len - off);
592		if (m->m_flags & M_EXT) {
593			n->m_data = m->m_data + off;
594			if(!m->m_ext.ext_ref)
595				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
596			else
597				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
598							m->m_ext.ext_size);
599			n->m_ext = m->m_ext;
600			n->m_flags |= M_EXT;
601		} else
602			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
603			    (unsigned)n->m_len);
604		if (len != M_COPYALL)
605			len -= n->m_len;
606		off = 0;
607		m = m->m_next;
608		np = &n->m_next;
609	}
610	if (top == 0)
611		MCFail++;
612	return (top);
613nospace:
614	m_freem(top);
615	MCFail++;
616	return (0);
617}
618
619/*
620 * Copy an entire packet, including header (which must be present).
621 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
622 * Note that the copy is read-only, because clusters are not copied,
623 * only their reference counts are incremented.
624 */
625struct mbuf *
626m_copypacket(m, how)
627	struct mbuf *m;
628	int how;
629{
630	struct mbuf *top, *n, *o;
631
632	MGET(n, how, m->m_type);
633	top = n;
634	if (!n)
635		goto nospace;
636
637	M_COPY_PKTHDR(n, m);
638	n->m_len = m->m_len;
639	if (m->m_flags & M_EXT) {
640		n->m_data = m->m_data;
641		if(!m->m_ext.ext_ref)
642			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
643		else
644			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
645						m->m_ext.ext_size);
646		n->m_ext = m->m_ext;
647		n->m_flags |= M_EXT;
648	} else {
649		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
650	}
651
652	m = m->m_next;
653	while (m) {
654		MGET(o, how, m->m_type);
655		if (!o)
656			goto nospace;
657
658		n->m_next = o;
659		n = n->m_next;
660
661		n->m_len = m->m_len;
662		if (m->m_flags & M_EXT) {
663			n->m_data = m->m_data;
664			if(!m->m_ext.ext_ref)
665				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
666			else
667				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
668							m->m_ext.ext_size);
669			n->m_ext = m->m_ext;
670			n->m_flags |= M_EXT;
671		} else {
672			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
673		}
674
675		m = m->m_next;
676	}
677	return top;
678nospace:
679	m_freem(top);
680	MCFail++;
681	return 0;
682}
683
684/*
685 * Copy data from an mbuf chain starting "off" bytes from the beginning,
686 * continuing for "len" bytes, into the indicated buffer.
687 */
688void
689m_copydata(m, off, len, cp)
690	register struct mbuf *m;
691	register int off;
692	register int len;
693	caddr_t cp;
694{
695	register unsigned count;
696
697	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
698	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
699	while (off > 0) {
700		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
701		if (off < m->m_len)
702			break;
703		off -= m->m_len;
704		m = m->m_next;
705	}
706	while (len > 0) {
707		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
708		count = min(m->m_len - off, len);
709		bcopy(mtod(m, caddr_t) + off, cp, count);
710		len -= count;
711		cp += count;
712		off = 0;
713		m = m->m_next;
714	}
715}
716
717/*
718 * Copy a packet header mbuf chain into a completely new chain, including
719 * copying any mbuf clusters.  Use this instead of m_copypacket() when
720 * you need a writable copy of an mbuf chain.
721 */
722struct mbuf *
723m_dup(m, how)
724	struct mbuf *m;
725	int how;
726{
727	struct mbuf **p, *top = NULL;
728	int remain, moff, nsize;
729
730	/* Sanity check */
731	if (m == NULL)
732		return (0);
733	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
734
735	/* While there's more data, get a new mbuf, tack it on, and fill it */
736	remain = m->m_pkthdr.len;
737	moff = 0;
738	p = &top;
739	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
740		struct mbuf *n;
741
742		/* Get the next new mbuf */
743		MGET(n, how, m->m_type);
744		if (n == NULL)
745			goto nospace;
746		if (top == NULL) {		/* first one, must be PKTHDR */
747			M_COPY_PKTHDR(n, m);
748			nsize = MHLEN;
749		} else				/* not the first one */
750			nsize = MLEN;
751		if (remain >= MINCLSIZE) {
752			MCLGET(n, how);
753			if ((n->m_flags & M_EXT) == 0) {
754				(void)m_free(n);
755				goto nospace;
756			}
757			nsize = MCLBYTES;
758		}
759		n->m_len = 0;
760
761		/* Link it into the new chain */
762		*p = n;
763		p = &n->m_next;
764
765		/* Copy data from original mbuf(s) into new mbuf */
766		while (n->m_len < nsize && m != NULL) {
767			int chunk = min(nsize - n->m_len, m->m_len - moff);
768
769			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
770			moff += chunk;
771			n->m_len += chunk;
772			remain -= chunk;
773			if (moff == m->m_len) {
774				m = m->m_next;
775				moff = 0;
776			}
777		}
778
779		/* Check correct total mbuf length */
780		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
781		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
782	}
783	return (top);
784
785nospace:
786	m_freem(top);
787	MCFail++;
788	return (0);
789}
790
791/*
792 * Concatenate mbuf chain n to m.
793 * Both chains must be of the same type (e.g. MT_DATA).
794 * Any m_pkthdr is not updated.
795 */
796void
797m_cat(m, n)
798	register struct mbuf *m, *n;
799{
800	while (m->m_next)
801		m = m->m_next;
802	while (n) {
803		if (m->m_flags & M_EXT ||
804		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
805			/* just join the two chains */
806			m->m_next = n;
807			return;
808		}
809		/* splat the data from one into the other */
810		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
811		    (u_int)n->m_len);
812		m->m_len += n->m_len;
813		n = m_free(n);
814	}
815}
816
817void
818m_adj(mp, req_len)
819	struct mbuf *mp;
820	int req_len;
821{
822	register int len = req_len;
823	register struct mbuf *m;
824	register int count;
825
826	if ((m = mp) == NULL)
827		return;
828	if (len >= 0) {
829		/*
830		 * Trim from head.
831		 */
832		while (m != NULL && len > 0) {
833			if (m->m_len <= len) {
834				len -= m->m_len;
835				m->m_len = 0;
836				m = m->m_next;
837			} else {
838				m->m_len -= len;
839				m->m_data += len;
840				len = 0;
841			}
842		}
843		m = mp;
844		if (mp->m_flags & M_PKTHDR)
845			m->m_pkthdr.len -= (req_len - len);
846	} else {
847		/*
848		 * Trim from tail.  Scan the mbuf chain,
849		 * calculating its length and finding the last mbuf.
850		 * If the adjustment only affects this mbuf, then just
851		 * adjust and return.  Otherwise, rescan and truncate
852		 * after the remaining size.
853		 */
854		len = -len;
855		count = 0;
856		for (;;) {
857			count += m->m_len;
858			if (m->m_next == (struct mbuf *)0)
859				break;
860			m = m->m_next;
861		}
862		if (m->m_len >= len) {
863			m->m_len -= len;
864			if (mp->m_flags & M_PKTHDR)
865				mp->m_pkthdr.len -= len;
866			return;
867		}
868		count -= len;
869		if (count < 0)
870			count = 0;
871		/*
872		 * Correct length for chain is "count".
873		 * Find the mbuf with last data, adjust its length,
874		 * and toss data from remaining mbufs on chain.
875		 */
876		m = mp;
877		if (m->m_flags & M_PKTHDR)
878			m->m_pkthdr.len = count;
879		for (; m; m = m->m_next) {
880			if (m->m_len >= count) {
881				m->m_len = count;
882				break;
883			}
884			count -= m->m_len;
885		}
886		while (m->m_next)
887			(m = m->m_next) ->m_len = 0;
888	}
889}
890
891/*
892 * Rearange an mbuf chain so that len bytes are contiguous
893 * and in the data area of an mbuf (so that mtod and dtom
894 * will work for a structure of size len).  Returns the resulting
895 * mbuf chain on success, frees it and returns null on failure.
896 * If there is room, it will add up to max_protohdr-len extra bytes to the
897 * contiguous region in an attempt to avoid being called next time.
898 */
899#define MPFail (mbstat.m_mpfail)
900
901struct mbuf *
902m_pullup(n, len)
903	register struct mbuf *n;
904	int len;
905{
906	register struct mbuf *m;
907	register int count;
908	int space;
909
910	/*
911	 * If first mbuf has no cluster, and has room for len bytes
912	 * without shifting current data, pullup into it,
913	 * otherwise allocate a new mbuf to prepend to the chain.
914	 */
915	if ((n->m_flags & M_EXT) == 0 &&
916	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
917		if (n->m_len >= len)
918			return (n);
919		m = n;
920		n = n->m_next;
921		len -= m->m_len;
922	} else {
923		if (len > MHLEN)
924			goto bad;
925		MGET(m, M_DONTWAIT, n->m_type);
926		if (m == 0)
927			goto bad;
928		m->m_len = 0;
929		if (n->m_flags & M_PKTHDR) {
930			M_COPY_PKTHDR(m, n);
931			n->m_flags &= ~M_PKTHDR;
932		}
933	}
934	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
935	do {
936		count = min(min(max(len, max_protohdr), space), n->m_len);
937		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
938		  (unsigned)count);
939		len -= count;
940		m->m_len += count;
941		n->m_len -= count;
942		space -= count;
943		if (n->m_len)
944			n->m_data += count;
945		else
946			n = m_free(n);
947	} while (len > 0 && n);
948	if (len > 0) {
949		(void) m_free(m);
950		goto bad;
951	}
952	m->m_next = n;
953	return (m);
954bad:
955	m_freem(n);
956	MPFail++;
957	return (0);
958}
959
960/*
961 * Partition an mbuf chain in two pieces, returning the tail --
962 * all but the first len0 bytes.  In case of failure, it returns NULL and
963 * attempts to restore the chain to its original state.
964 */
965struct mbuf *
966m_split(m0, len0, wait)
967	register struct mbuf *m0;
968	int len0, wait;
969{
970	register struct mbuf *m, *n;
971	unsigned len = len0, remain;
972
973	for (m = m0; m && len > m->m_len; m = m->m_next)
974		len -= m->m_len;
975	if (m == 0)
976		return (0);
977	remain = m->m_len - len;
978	if (m0->m_flags & M_PKTHDR) {
979		MGETHDR(n, wait, m0->m_type);
980		if (n == 0)
981			return (0);
982		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
983		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
984		m0->m_pkthdr.len = len0;
985		if (m->m_flags & M_EXT)
986			goto extpacket;
987		if (remain > MHLEN) {
988			/* m can't be the lead packet */
989			MH_ALIGN(n, 0);
990			n->m_next = m_split(m, len, wait);
991			if (n->m_next == 0) {
992				(void) m_free(n);
993				return (0);
994			} else
995				return (n);
996		} else
997			MH_ALIGN(n, remain);
998	} else if (remain == 0) {
999		n = m->m_next;
1000		m->m_next = 0;
1001		return (n);
1002	} else {
1003		MGET(n, wait, m->m_type);
1004		if (n == 0)
1005			return (0);
1006		M_ALIGN(n, remain);
1007	}
1008extpacket:
1009	if (m->m_flags & M_EXT) {
1010		n->m_flags |= M_EXT;
1011		n->m_ext = m->m_ext;
1012		if(!m->m_ext.ext_ref)
1013			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
1014		else
1015			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
1016						m->m_ext.ext_size);
1017		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
1018		n->m_data = m->m_data + len;
1019	} else {
1020		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1021	}
1022	n->m_len = remain;
1023	m->m_len = len;
1024	n->m_next = m->m_next;
1025	m->m_next = 0;
1026	return (n);
1027}
1028/*
1029 * Routine to copy from device local memory into mbufs.
1030 */
1031struct mbuf *
1032m_devget(buf, totlen, off0, ifp, copy)
1033	char *buf;
1034	int totlen, off0;
1035	struct ifnet *ifp;
1036	void (*copy) __P((char *from, caddr_t to, u_int len));
1037{
1038	register struct mbuf *m;
1039	struct mbuf *top = 0, **mp = &top;
1040	register int off = off0, len;
1041	register char *cp;
1042	char *epkt;
1043
1044	cp = buf;
1045	epkt = cp + totlen;
1046	if (off) {
1047		cp += off + 2 * sizeof(u_short);
1048		totlen -= 2 * sizeof(u_short);
1049	}
1050	MGETHDR(m, M_DONTWAIT, MT_DATA);
1051	if (m == 0)
1052		return (0);
1053	m->m_pkthdr.rcvif = ifp;
1054	m->m_pkthdr.len = totlen;
1055	m->m_len = MHLEN;
1056
1057	while (totlen > 0) {
1058		if (top) {
1059			MGET(m, M_DONTWAIT, MT_DATA);
1060			if (m == 0) {
1061				m_freem(top);
1062				return (0);
1063			}
1064			m->m_len = MLEN;
1065		}
1066		len = min(totlen, epkt - cp);
1067		if (len >= MINCLSIZE) {
1068			MCLGET(m, M_DONTWAIT);
1069			if (m->m_flags & M_EXT)
1070				m->m_len = len = min(len, MCLBYTES);
1071			else
1072				len = m->m_len;
1073		} else {
1074			/*
1075			 * Place initial small packet/header at end of mbuf.
1076			 */
1077			if (len < m->m_len) {
1078				if (top == 0 && len + max_linkhdr <= m->m_len)
1079					m->m_data += max_linkhdr;
1080				m->m_len = len;
1081			} else
1082				len = m->m_len;
1083		}
1084		if (copy)
1085			copy(cp, mtod(m, caddr_t), (unsigned)len);
1086		else
1087			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
1088		cp += len;
1089		*mp = m;
1090		mp = &m->m_next;
1091		totlen -= len;
1092		if (cp == epkt)
1093			cp = buf;
1094	}
1095	return (top);
1096}
1097
1098/*
1099 * Copy data from a buffer back into the indicated mbuf chain,
1100 * starting "off" bytes from the beginning, extending the mbuf
1101 * chain if necessary.
1102 */
1103void
1104m_copyback(m0, off, len, cp)
1105	struct	mbuf *m0;
1106	register int off;
1107	register int len;
1108	caddr_t cp;
1109{
1110	register int mlen;
1111	register struct mbuf *m = m0, *n;
1112	int totlen = 0;
1113
1114	if (m0 == 0)
1115		return;
1116	while (off > (mlen = m->m_len)) {
1117		off -= mlen;
1118		totlen += mlen;
1119		if (m->m_next == 0) {
1120			n = m_getclr(M_DONTWAIT, m->m_type);
1121			if (n == 0)
1122				goto out;
1123			n->m_len = min(MLEN, len + off);
1124			m->m_next = n;
1125		}
1126		m = m->m_next;
1127	}
1128	while (len > 0) {
1129		mlen = min (m->m_len - off, len);
1130		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1131		cp += mlen;
1132		len -= mlen;
1133		mlen += off;
1134		off = 0;
1135		totlen += mlen;
1136		if (len == 0)
1137			break;
1138		if (m->m_next == 0) {
1139			n = m_get(M_DONTWAIT, m->m_type);
1140			if (n == 0)
1141				break;
1142			n->m_len = min(MLEN, len);
1143			m->m_next = n;
1144		}
1145		m = m->m_next;
1146	}
1147out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1148		m->m_pkthdr.len = totlen;
1149}
1150
1151void
1152m_print(const struct mbuf *m)
1153{
1154	int len;
1155	const struct mbuf *m2;
1156
1157	len = m->m_pkthdr.len;
1158	m2 = m;
1159	while (len) {
1160		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
1161		len -= m2->m_len;
1162		m2 = m2->m_next;
1163	}
1164	return;
1165}
1166