uipc_mbuf.c revision 71089
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. All advertising materials mentioning features or use of this software
141541Srgrimes *    must display the following acknowledgement:
151541Srgrimes *	This product includes software developed by the University of
161541Srgrimes *	California, Berkeley and its contributors.
171541Srgrimes * 4. Neither the name of the University nor the names of its contributors
181541Srgrimes *    may be used to endorse or promote products derived from this software
191541Srgrimes *    without specific prior written permission.
201541Srgrimes *
211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311541Srgrimes * SUCH DAMAGE.
321541Srgrimes *
331541Srgrimes *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 71089 2001-01-16 01:53:13Z bmilekic $
351541Srgrimes */
361541Srgrimes
3748579Smsmith#include "opt_param.h"
381541Srgrimes#include <sys/param.h>
391541Srgrimes#include <sys/systm.h>
4032036Sbde#include <sys/malloc.h>
411541Srgrimes#include <sys/mbuf.h>
4267365Sjhb#include <sys/mutex.h>
431541Srgrimes#include <sys/kernel.h>
4423081Swollman#include <sys/sysctl.h>
451541Srgrimes#include <sys/domain.h>
461541Srgrimes#include <sys/protosw.h>
471541Srgrimes#include <vm/vm.h>
489759Sbde#include <vm/vm_kern.h>
4912662Sdg#include <vm/vm_extern.h>
501541Srgrimes
5110653Sdgstatic void mbinit __P((void *));
5210358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
5310358Sjulian
549759Sbdestruct mbuf *mbutl;
559759Sbdestruct mbstat mbstat;
5663203Salfredu_long	mbtypes[MT_NTYPES];
579759Sbdeint	max_linkhdr;
589759Sbdeint	max_protohdr;
599759Sbdeint	max_hdr;
609759Sbdeint	max_datalen;
6148579Smsmithint	nmbclusters;
6248579Smsmithint	nmbufs;
6367144Sbmilekicint	nmbcnt;
6466475Sbmilekicu_long	m_mballoc_wid = 0;
6566475Sbmilekicu_long	m_clalloc_wid = 0;
661541Srgrimes
6766475Sbmilekic/*
6866475Sbmilekic * freelist header structures...
6966475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst
7066475Sbmilekic */
7166475Sbmilekicstruct mbffree_lst mmbfree;
7266475Sbmilekicstruct mclfree_lst mclfree;
7366475Sbmilekicstruct mcntfree_lst mcntfree;
7466475Sbmilekic
7566475Sbmilekic/*
7666475Sbmilekic * sysctl(8) exported objects
7766475Sbmilekic */
7844078SdfrSYSCTL_DECL(_kern_ipc);
7923081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
8023081Swollman	   &max_linkhdr, 0, "");
8123081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
8223081Swollman	   &max_protohdr, 0, "");
8323081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
8423081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
8523081Swollman	   &max_datalen, 0, "");
8654478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
8754478Sgreen	   &mbuf_wait, 0, "");
8864048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
8963203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
9063203Salfred	   sizeof(mbtypes), "LU", "");
9148579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
9255171Smsmith	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
9355171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
9455171Smsmith	   "Maximum number of mbufs available");
9567144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
9667144Sbmilekic	   "Maximum number of ext_buf counters available");
9748579Smsmith#ifndef NMBCLUSTERS
9848579Smsmith#define NMBCLUSTERS	(512 + MAXUSERS * 16)
9948579Smsmith#endif
10048579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
10155171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
10267144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt);
10323081Swollman
10412819Sphkstatic void	m_reclaim __P((void));
10512819Sphk
10666475Sbmilekic/* Initial allocation numbers */
10764837Sdwmalone#define NCL_INIT	2
10815744Sphk#define NMB_INIT	16
10966475Sbmilekic#define REF_INIT	NMBCLUSTERS
11015744Sphk
11166475Sbmilekic/*
11266475Sbmilekic * Full mbuf subsystem initialization done here.
11366475Sbmilekic *
11466475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to
11566475Sbmilekic *      machdep.c - for now, there is no reason for this stuff to go there.
11666475Sbmilekic */
11710358Sjulianstatic void
11812569Sbdembinit(dummy)
11912569Sbde	void *dummy;
1201541Srgrimes{
12166475Sbmilekic	vm_offset_t maxaddr, mb_map_size;
1221541Srgrimes
12366475Sbmilekic	/*
12466475Sbmilekic	 * Setup the mb_map, allocate requested VM space.
12566475Sbmilekic	 */
12667144Sbmilekic	mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt
12766475Sbmilekic	    * sizeof(union mext_refcnt);
12866475Sbmilekic	mb_map_size = roundup2(mb_map_size, PAGE_SIZE);
12966475Sbmilekic	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
13066475Sbmilekic	    mb_map_size);
13166475Sbmilekic	/* XXX: mb_map->system_map = 1; */
13264837Sdwmalone
13366475Sbmilekic	/*
13466475Sbmilekic	 * Initialize the free list headers, and setup locks for lists.
13566475Sbmilekic	 */
13666475Sbmilekic	mmbfree.m_head = NULL;
13766475Sbmilekic	mclfree.m_head = NULL;
13866475Sbmilekic	mcntfree.m_head = NULL;
13966475Sbmilekic	mtx_init(&mmbfree.m_mtx, "mbuf free list lock", MTX_DEF);
14066475Sbmilekic	mtx_init(&mclfree.m_mtx, "mcluster free list lock", MTX_DEF);
14166475Sbmilekic	mtx_init(&mcntfree.m_mtx, "m_ext counter free list lock", MTX_DEF);
14266475Sbmilekic
14366475Sbmilekic	/*
14466475Sbmilekic	 * Initialize mbuf subsystem (sysctl exported) statistics structure.
14566475Sbmilekic	 */
14623081Swollman	mbstat.m_msize = MSIZE;
14723081Swollman	mbstat.m_mclbytes = MCLBYTES;
14823081Swollman	mbstat.m_minclsize = MINCLSIZE;
14923081Swollman	mbstat.m_mlen = MLEN;
15023081Swollman	mbstat.m_mhlen = MHLEN;
15123081Swollman
15266475Sbmilekic	/*
15366475Sbmilekic	 * Perform some initial allocations.
15466475Sbmilekic	 */
15566475Sbmilekic	mtx_enter(&mcntfree.m_mtx, MTX_DEF);
15666475Sbmilekic	if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
15764837Sdwmalone		goto bad;
15866475Sbmilekic	mtx_exit(&mcntfree.m_mtx, MTX_DEF);
15966475Sbmilekic
16066475Sbmilekic	mtx_enter(&mmbfree.m_mtx, MTX_DEF);
16115689Swollman	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
16215689Swollman		goto bad;
16366475Sbmilekic	mtx_exit(&mmbfree.m_mtx, MTX_DEF);
16466475Sbmilekic
16566475Sbmilekic	mtx_enter(&mclfree.m_mtx, MTX_DEF);
1661541Srgrimes	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
1671541Srgrimes		goto bad;
16866475Sbmilekic	mtx_exit(&mclfree.m_mtx, MTX_DEF);
16966475Sbmilekic
1701541Srgrimes	return;
1711541Srgrimesbad:
17264837Sdwmalone	panic("mbinit: failed to initialize mbuf subsystem!");
1731541Srgrimes}
1741541Srgrimes
1751541Srgrimes/*
17664837Sdwmalone * Allocate at least nmb reference count structs and place them
17764837Sdwmalone * on the ref cnt free list.
17866475Sbmilekic *
17966475Sbmilekic * Must be called with the mcntfree lock held.
18064837Sdwmalone */
18164837Sdwmaloneint
18266475Sbmilekicm_alloc_ref(nmb, how)
18364837Sdwmalone	u_int nmb;
18466475Sbmilekic	int how;
18564837Sdwmalone{
18664837Sdwmalone	caddr_t p;
18764837Sdwmalone	u_int nbytes;
18864837Sdwmalone	int i;
18964837Sdwmalone
19064837Sdwmalone	/*
19164837Sdwmalone	 * We don't cap the amount of memory that can be used
19264837Sdwmalone	 * by the reference counters, like we do for mbufs and
19366475Sbmilekic	 * mbuf clusters. In fact, we're absolutely sure that we
19466475Sbmilekic	 * won't ever be going over our allocated space. We keep enough
19566475Sbmilekic	 * space in mb_map to accomodate maximum values of allocatable
19666475Sbmilekic	 * external buffers including, but not limited to, clusters.
19766475Sbmilekic	 * (That's also why we won't have to have wait routines for
19866475Sbmilekic	 * counters).
19966475Sbmilekic	 *
20066475Sbmilekic	 * If we're in here, we're absolutely certain to be returning
20166475Sbmilekic	 * succesfully, as long as there is physical memory to accomodate
20266475Sbmilekic	 * us. And if there isn't, but we're willing to wait, then
20366475Sbmilekic	 * kmem_malloc() will do the only waiting needed.
20464837Sdwmalone	 */
20564837Sdwmalone
20664837Sdwmalone	nbytes = round_page(nmb * sizeof(union mext_refcnt));
20766475Sbmilekic	mtx_exit(&mcntfree.m_mtx, MTX_DEF);
20871089Sbmilekic#ifdef WITNESS
20971089Sbmilekic	/*
21071089Sbmilekic	 * XXX: Make sure we don't create lock order problems.
21171089Sbmilekic	 * XXX: We'll grab Giant, but for that to be OK, make sure
21271089Sbmilekic	 * XXX: that either Giant is already held OR make sure that
21371089Sbmilekic	 * XXX: no other locks are held coming in.
21471089Sbmilekic	 * XXX: Revisit once most of the net stuff gets locks added.
21571089Sbmilekic	 */
21671089Sbmilekic	KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0,
21771089Sbmilekic	    ("m_alloc_ref: Giant must be owned or no locks held"));
21871089Sbmilekic#endif
21966475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
22070254Sbmilekic	if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
22170254Sbmilekic	    M_WAITOK : M_NOWAIT)) == NULL) {
22266475Sbmilekic		mtx_exit(&Giant, MTX_DEF);
22371089Sbmilekic		mtx_enter(&mcntfree.m_mtx, MTX_DEF);
22464837Sdwmalone		return (0);
22566475Sbmilekic	}
22666475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
22764837Sdwmalone	nmb = nbytes / sizeof(union mext_refcnt);
22864837Sdwmalone
22966475Sbmilekic	/*
23066475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
23166475Sbmilekic	 * It is up to the caller to let go of the mutex.
23266475Sbmilekic	 */
23366475Sbmilekic	mtx_enter(&mcntfree.m_mtx, MTX_DEF);
23464837Sdwmalone	for (i = 0; i < nmb; i++) {
23566475Sbmilekic		((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
23666475Sbmilekic		mcntfree.m_head = (union mext_refcnt *)p;
23764837Sdwmalone		p += sizeof(union mext_refcnt);
23864837Sdwmalone		mbstat.m_refree++;
23964837Sdwmalone	}
24064837Sdwmalone	mbstat.m_refcnt += nmb;
24164837Sdwmalone
24264837Sdwmalone	return (1);
24364837Sdwmalone}
24464837Sdwmalone
24564837Sdwmalone/*
24615689Swollman * Allocate at least nmb mbufs and place on mbuf free list.
24766475Sbmilekic *
24866475Sbmilekic * Must be called with the mmbfree lock held.
24915689Swollman */
25015689Swollmanint
25132036Sbdem_mballoc(nmb, how)
25215689Swollman	register int nmb;
25332036Sbde	int how;
25415689Swollman{
25515689Swollman	register caddr_t p;
25615689Swollman	register int i;
25715689Swollman	int nbytes;
25815689Swollman
25954478Sgreen	/*
26066475Sbmilekic	 * If we've hit the mbuf limit, stop allocating from mb_map.
26166475Sbmilekic	 * Also, once we run out of map space, it will be impossible to
26266475Sbmilekic	 * get any more (nothing is ever freed back to the map).
26355171Smsmith	 */
26466475Sbmilekic	if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) {
26566475Sbmilekic		/*
26666475Sbmilekic		 * Needs to be atomic as we may be incrementing it
26766475Sbmilekic		 * while holding another mutex, like mclfree. In other
26866475Sbmilekic		 * words, m_drops is not reserved solely for mbufs,
26966475Sbmilekic		 * but is also available for clusters.
27066475Sbmilekic		 */
27166475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
27255171Smsmith		return (0);
27366475Sbmilekic	}
27455171Smsmith
27566475Sbmilekic	nbytes = round_page(nmb * MSIZE);
27615689Swollman
27766475Sbmilekic	mtx_exit(&mmbfree.m_mtx, MTX_DEF);
27871089Sbmilekic#ifdef WITNESS
27971089Sbmilekic	/*
28071089Sbmilekic	 * XXX: Make sure we don't create lock order problems.
28171089Sbmilekic	 * XXX: We'll grab Giant, but for that to be OK, make sure
28271089Sbmilekic	 * XXX: that either Giant is already held OR make sure that
28371089Sbmilekic	 * XXX: no other locks are held coming in.
28471089Sbmilekic	 * XXX: Revisit once most of the net stuff gets locks added.
28571089Sbmilekic	 */
28671089Sbmilekic	KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0,
28771089Sbmilekic	    ("m_mballoc: Giant must be owned or no locks held"));
28871089Sbmilekic#endif
28966475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
29022899Swollman	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
29170254Sbmilekic	if (p == 0 && how == M_TRYWAIT) {
29266475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
29322899Swollman		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
29422899Swollman	}
29566475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
29666475Sbmilekic	mtx_enter(&mmbfree.m_mtx, MTX_DEF);
29722899Swollman
29815689Swollman	/*
29966475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
30015689Swollman	 * are no pages left.
30115689Swollman	 */
30215689Swollman	if (p == NULL)
30315689Swollman		return (0);
30415689Swollman
30515689Swollman	nmb = nbytes / MSIZE;
30666475Sbmilekic
30766475Sbmilekic	/*
30866475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
30966475Sbmilekic	 * It is up to the caller to let go of the mutex when done
31066475Sbmilekic	 * with grabbing the mbuf from the free list.
31166475Sbmilekic	 */
31215689Swollman	for (i = 0; i < nmb; i++) {
31366475Sbmilekic		((struct mbuf *)p)->m_next = mmbfree.m_head;
31466475Sbmilekic		mmbfree.m_head = (struct mbuf *)p;
31515689Swollman		p += MSIZE;
31615689Swollman	}
31715689Swollman	mbstat.m_mbufs += nmb;
31863203Salfred	mbtypes[MT_FREE] += nmb;
31915689Swollman	return (1);
32015689Swollman}
32115689Swollman
32254478Sgreen/*
32354478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros
32470254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
32570254Sbmilekic * rely solely on reclaimed mbufs.
32666475Sbmilekic *
32766475Sbmilekic * Here we request for the protocols to free up some resources and, if we
32866475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a
32954478Sgreen * designated (mbuf_wait) time.
33066475Sbmilekic *
33170858Sbmilekic * Must be called with the mmbfree mutex held.
33254478Sgreen */
33354478Sgreenstruct mbuf *
33466475Sbmilekicm_mballoc_wait(void)
33554478Sgreen{
33666475Sbmilekic	struct mbuf *p = NULL;
33754478Sgreen
33854478Sgreen	/*
33966475Sbmilekic	 * See if we can drain some resources out of the protocols.
34070858Sbmilekic	 * We drop the mmbfree mutex to avoid recursing into it in some of
34170858Sbmilekic	 * the drain routines. Clearly, we're faced with a race here because
34270858Sbmilekic	 * once something is freed during the drain, it may be grabbed right
34370858Sbmilekic	 * from under us by some other thread. But we accept this possibility
34470858Sbmilekic	 * in order to avoid a potentially large lock recursion and, more
34570858Sbmilekic	 * importantly, to avoid a potential lock order reversal which may
34670858Sbmilekic	 * result in deadlock (See comment above m_reclaim()).
34754478Sgreen	 */
34870858Sbmilekic	mtx_exit(&mmbfree.m_mtx, MTX_DEF);
34966475Sbmilekic	m_reclaim();
35070858Sbmilekic
35170858Sbmilekic	mtx_enter(&mmbfree.m_mtx, MTX_DEF);
35266475Sbmilekic	_MGET(p, M_DONTWAIT);
35354478Sgreen
35466475Sbmilekic	if (p == NULL) {
35566475Sbmilekic		m_mballoc_wid++;
35666475Sbmilekic		if (msleep(&m_mballoc_wid, &mmbfree.m_mtx, PVM, "mballc",
35766475Sbmilekic		    mbuf_wait) == EWOULDBLOCK)
35866475Sbmilekic			m_mballoc_wid--;
35966475Sbmilekic
36066475Sbmilekic		/*
36166475Sbmilekic		 * Try again (one last time).
36266475Sbmilekic		 *
36366475Sbmilekic		 * We retry to fetch _even_ if the sleep timed out. This
36466475Sbmilekic		 * is left this way, purposely, in the [unlikely] case
36566475Sbmilekic		 * that an mbuf was freed but the sleep was not awoken
36666475Sbmilekic		 * in time.
36766475Sbmilekic		 *
36866475Sbmilekic		 * If the sleep didn't time out (i.e. we got woken up) then
36966475Sbmilekic		 * we have the lock so we just grab an mbuf, hopefully.
37066475Sbmilekic		 */
37166475Sbmilekic		_MGET(p, M_DONTWAIT);
37254478Sgreen	}
37354478Sgreen
37466475Sbmilekic	/* If we waited and got something... */
37566475Sbmilekic	if (p != NULL) {
37666475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
37766475Sbmilekic		if (mmbfree.m_head != NULL)
37866475Sbmilekic			MBWAKEUP(m_mballoc_wid);
37966475Sbmilekic	} else
38066475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
38122671Swollman
38266475Sbmilekic	return (p);
38322671Swollman}
38422671Swollman
38515689Swollman/*
3861541Srgrimes * Allocate some number of mbuf clusters
3871541Srgrimes * and place on cluster free list.
38866475Sbmilekic *
38966475Sbmilekic * Must be called with the mclfree lock held.
3901541Srgrimes */
3911549Srgrimesint
39232036Sbdem_clalloc(ncl, how)
3931541Srgrimes	register int ncl;
39432036Sbde	int how;
3951541Srgrimes{
3961541Srgrimes	register caddr_t p;
3971541Srgrimes	register int i;
3981541Srgrimes	int npg;
3991541Srgrimes
4007066Sdg	/*
40166475Sbmilekic	 * If the map is now full (nothing will ever be freed to it).
40255171Smsmith	 * If we've hit the mcluster number limit, stop allocating from
40366475Sbmilekic	 * mb_map.
40455171Smsmith	 */
40566475Sbmilekic	if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) {
40666475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
40755171Smsmith		return (0);
40855171Smsmith	}
40955171Smsmith
41015543Sphk	npg = ncl;
41166475Sbmilekic	mtx_exit(&mclfree.m_mtx, MTX_DEF);
41271089Sbmilekic#ifdef WITNESS
41371089Sbmilekic	/*
41471089Sbmilekic	 * XXX: Make sure we don't create lock order problems.
41571089Sbmilekic	 * XXX: We'll grab Giant, but for that to be OK, make sure
41671089Sbmilekic	 * XXX: that either Giant is already held OR make sure that
41771089Sbmilekic	 * XXX: no other locks are held coming in.
41871089Sbmilekic	 * XXX: Revisit once most of the net stuff gets locks added.
41971089Sbmilekic	 */
42071089Sbmilekic	KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0,
42171089Sbmilekic	    ("m_clalloc: Giant must be owned or no locks held"));
42271089Sbmilekic#endif
42366475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
42421737Sdg	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
42570254Sbmilekic				 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
42666475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
42722671Swollman	ncl = ncl * PAGE_SIZE / MCLBYTES;
42866475Sbmilekic	mtx_enter(&mclfree.m_mtx, MTX_DEF);
42966475Sbmilekic
4307066Sdg	/*
43166475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
4327066Sdg	 * are no pages left.
4337066Sdg	 */
43422899Swollman	if (p == NULL) {
43566475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
4361541Srgrimes		return (0);
43722899Swollman	}
4387066Sdg
43966475Sbmilekic	/*
44066475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
44166475Sbmilekic	 */
4421541Srgrimes	for (i = 0; i < ncl; i++) {
44366475Sbmilekic		((union mcluster *)p)->mcl_next = mclfree.m_head;
44466475Sbmilekic		mclfree.m_head = (union mcluster *)p;
4451541Srgrimes		p += MCLBYTES;
4461541Srgrimes		mbstat.m_clfree++;
4471541Srgrimes	}
4481541Srgrimes	mbstat.m_clusters += ncl;
4491541Srgrimes	return (1);
4501541Srgrimes}
4511541Srgrimes
4521541Srgrimes/*
45354478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with
45470254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
45554478Sgreen * sleep for a designated amount of time (mbuf_wait) or until we're woken up
45654478Sgreen * due to sudden mcluster availability.
45766475Sbmilekic *
45866475Sbmilekic * Must be called with the mclfree lock held.
45954478Sgreen */
46054478Sgreencaddr_t
46154478Sgreenm_clalloc_wait(void)
46254478Sgreen{
46366475Sbmilekic	caddr_t p = NULL;
46454478Sgreen
46554478Sgreen	m_clalloc_wid++;
46666475Sbmilekic	if (msleep(&m_clalloc_wid, &mclfree.m_mtx, PVM, "mclalc", mbuf_wait)
46766475Sbmilekic	    == EWOULDBLOCK)
46854478Sgreen		m_clalloc_wid--;
46954478Sgreen
47054478Sgreen	/*
47166475Sbmilekic	 * Now that we (think) that we've got something, try again.
47254478Sgreen	 */
47364837Sdwmalone	_MCLALLOC(p, M_DONTWAIT);
47454478Sgreen
47566475Sbmilekic	/* If we waited and got something ... */
47666475Sbmilekic	if (p != NULL) {
47766475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
47866475Sbmilekic		if (mclfree.m_head != NULL)
47966475Sbmilekic			MBWAKEUP(m_clalloc_wid);
48066475Sbmilekic	} else
48166475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
48254478Sgreen
48354478Sgreen	return (p);
48454478Sgreen}
48554478Sgreen
48654478Sgreen/*
48766475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources...
48866475Sbmilekic *
48970858Sbmilekic * XXX: No locks should be held going in here. The drain routines have
49070858Sbmilekic * to presently acquire some locks which raises the possibility of lock
49170858Sbmilekic * order violation if we're holding any mutex if that mutex is acquired in
49270858Sbmilekic * reverse order relative to one of the locks in the drain routines.
4931541Srgrimes */
49412819Sphkstatic void
4951541Srgrimesm_reclaim()
4961541Srgrimes{
4971541Srgrimes	register struct domain *dp;
4981541Srgrimes	register struct protosw *pr;
4991541Srgrimes
50071089Sbmilekic#ifdef WITNESS
50171089Sbmilekic	KASSERT(witness_list(CURPROC) == 0,
50271089Sbmilekic	    ("m_reclaim called with locks held"));
50371089Sbmilekic#endif
50471089Sbmilekic
5051541Srgrimes	for (dp = domains; dp; dp = dp->dom_next)
5061541Srgrimes		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
5071541Srgrimes			if (pr->pr_drain)
5081541Srgrimes				(*pr->pr_drain)();
5091541Srgrimes	mbstat.m_drain++;
5101541Srgrimes}
5111541Srgrimes
5121541Srgrimes/*
5131541Srgrimes * Space allocation routines.
5141541Srgrimes * These are also available as macros
5151541Srgrimes * for critical paths.
5161541Srgrimes */
5171541Srgrimesstruct mbuf *
51832036Sbdem_get(how, type)
51932036Sbde	int how, type;
5201541Srgrimes{
5211541Srgrimes	register struct mbuf *m;
5221541Srgrimes
52332036Sbde	MGET(m, how, type);
5241541Srgrimes	return (m);
5251541Srgrimes}
5261541Srgrimes
5271541Srgrimesstruct mbuf *
52832036Sbdem_gethdr(how, type)
52932036Sbde	int how, type;
5301541Srgrimes{
5311541Srgrimes	register struct mbuf *m;
5321541Srgrimes
53332036Sbde	MGETHDR(m, how, type);
5341541Srgrimes	return (m);
5351541Srgrimes}
5361541Srgrimes
5371541Srgrimesstruct mbuf *
53832036Sbdem_getclr(how, type)
53932036Sbde	int how, type;
5401541Srgrimes{
5411541Srgrimes	register struct mbuf *m;
5421541Srgrimes
54332036Sbde	MGET(m, how, type);
5441541Srgrimes	if (m == 0)
5451541Srgrimes		return (0);
5461541Srgrimes	bzero(mtod(m, caddr_t), MLEN);
5471541Srgrimes	return (m);
5481541Srgrimes}
5491541Srgrimes
5501541Srgrimesstruct mbuf *
5511541Srgrimesm_free(m)
5521541Srgrimes	struct mbuf *m;
5531541Srgrimes{
5541541Srgrimes	register struct mbuf *n;
5551541Srgrimes
5561541Srgrimes	MFREE(m, n);
5571541Srgrimes	return (n);
5581541Srgrimes}
5591541Srgrimes
5601541Srgrimesvoid
5611541Srgrimesm_freem(m)
5621541Srgrimes	register struct mbuf *m;
5631541Srgrimes{
5641541Srgrimes	register struct mbuf *n;
5651541Srgrimes
5661541Srgrimes	if (m == NULL)
5671541Srgrimes		return;
5681541Srgrimes	do {
56962587Sitojun		/*
57062587Sitojun		 * we do need to check non-first mbuf, since some of existing
57162587Sitojun		 * code does not call M_PREPEND properly.
57262587Sitojun		 * (example: call to bpf_mtap from drivers)
57362587Sitojun		 */
57462587Sitojun		if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) {
57562587Sitojun			m_freem(m->m_pkthdr.aux);
57662587Sitojun			m->m_pkthdr.aux = NULL;
57762587Sitojun		}
5781541Srgrimes		MFREE(m, n);
5793308Sphk		m = n;
5803308Sphk	} while (m);
5811541Srgrimes}
5821541Srgrimes
5831541Srgrimes/*
5841541Srgrimes * Mbuffer utility routines.
5851541Srgrimes */
5861541Srgrimes
5871541Srgrimes/*
5881541Srgrimes * Lesser-used path for M_PREPEND:
5891541Srgrimes * allocate new mbuf to prepend to chain,
5901541Srgrimes * copy junk along.
5911541Srgrimes */
5921541Srgrimesstruct mbuf *
5931541Srgrimesm_prepend(m, len, how)
5941541Srgrimes	register struct mbuf *m;
5951541Srgrimes	int len, how;
5961541Srgrimes{
5971541Srgrimes	struct mbuf *mn;
5981541Srgrimes
5991541Srgrimes	MGET(mn, how, m->m_type);
6001541Srgrimes	if (mn == (struct mbuf *)NULL) {
6011541Srgrimes		m_freem(m);
6021541Srgrimes		return ((struct mbuf *)NULL);
6031541Srgrimes	}
6041541Srgrimes	if (m->m_flags & M_PKTHDR) {
6051541Srgrimes		M_COPY_PKTHDR(mn, m);
6061541Srgrimes		m->m_flags &= ~M_PKTHDR;
6071541Srgrimes	}
6081541Srgrimes	mn->m_next = m;
6091541Srgrimes	m = mn;
6101541Srgrimes	if (len < MHLEN)
6111541Srgrimes		MH_ALIGN(m, len);
6121541Srgrimes	m->m_len = len;
6131541Srgrimes	return (m);
6141541Srgrimes}
6151541Srgrimes
6161541Srgrimes/*
6171541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
6181541Srgrimes * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
61970254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
62054002Sarchie * Note that the copy is read-only, because clusters are not copied,
62154002Sarchie * only their reference counts are incremented.
6221541Srgrimes */
62323081Swollman#define MCFail (mbstat.m_mcfail)
6241541Srgrimes
6251541Srgrimesstruct mbuf *
6261541Srgrimesm_copym(m, off0, len, wait)
6271541Srgrimes	register struct mbuf *m;
6281541Srgrimes	int off0, wait;
6291541Srgrimes	register int len;
6301541Srgrimes{
6311541Srgrimes	register struct mbuf *n, **np;
6321541Srgrimes	register int off = off0;
6331541Srgrimes	struct mbuf *top;
6341541Srgrimes	int copyhdr = 0;
6351541Srgrimes
63652201Salfred	KASSERT(off >= 0, ("m_copym, negative off %d", off));
63752201Salfred	KASSERT(len >= 0, ("m_copym, negative len %d", len));
6381541Srgrimes	if (off == 0 && m->m_flags & M_PKTHDR)
6391541Srgrimes		copyhdr = 1;
6401541Srgrimes	while (off > 0) {
64152201Salfred		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
6421541Srgrimes		if (off < m->m_len)
6431541Srgrimes			break;
6441541Srgrimes		off -= m->m_len;
6451541Srgrimes		m = m->m_next;
6461541Srgrimes	}
6471541Srgrimes	np = &top;
6481541Srgrimes	top = 0;
6491541Srgrimes	while (len > 0) {
6501541Srgrimes		if (m == 0) {
65152201Salfred			KASSERT(len == M_COPYALL,
65252201Salfred			    ("m_copym, length > size of mbuf chain"));
6531541Srgrimes			break;
6541541Srgrimes		}
6551541Srgrimes		MGET(n, wait, m->m_type);
6561541Srgrimes		*np = n;
6571541Srgrimes		if (n == 0)
6581541Srgrimes			goto nospace;
6591541Srgrimes		if (copyhdr) {
6601541Srgrimes			M_COPY_PKTHDR(n, m);
6611541Srgrimes			if (len == M_COPYALL)
6621541Srgrimes				n->m_pkthdr.len -= off0;
6631541Srgrimes			else
6641541Srgrimes				n->m_pkthdr.len = len;
6651541Srgrimes			copyhdr = 0;
6661541Srgrimes		}
6671541Srgrimes		n->m_len = min(len, m->m_len - off);
6681541Srgrimes		if (m->m_flags & M_EXT) {
6691541Srgrimes			n->m_data = m->m_data + off;
6701541Srgrimes			n->m_ext = m->m_ext;
6711541Srgrimes			n->m_flags |= M_EXT;
67264837Sdwmalone			MEXT_ADD_REF(m);
6731541Srgrimes		} else
6741541Srgrimes			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
6751541Srgrimes			    (unsigned)n->m_len);
6761541Srgrimes		if (len != M_COPYALL)
6771541Srgrimes			len -= n->m_len;
6781541Srgrimes		off = 0;
6791541Srgrimes		m = m->m_next;
6801541Srgrimes		np = &n->m_next;
6811541Srgrimes	}
6821541Srgrimes	if (top == 0)
68366475Sbmilekic		atomic_add_long(&MCFail, 1);
6841541Srgrimes	return (top);
6851541Srgrimesnospace:
6861541Srgrimes	m_freem(top);
68766475Sbmilekic	atomic_add_long(&MCFail, 1);
6881541Srgrimes	return (0);
6891541Srgrimes}
6901541Srgrimes
6911541Srgrimes/*
69215689Swollman * Copy an entire packet, including header (which must be present).
69315689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
69454002Sarchie * Note that the copy is read-only, because clusters are not copied,
69554002Sarchie * only their reference counts are incremented.
69615689Swollman */
69715689Swollmanstruct mbuf *
69815689Swollmanm_copypacket(m, how)
69915689Swollman	struct mbuf *m;
70015689Swollman	int how;
70115689Swollman{
70215689Swollman	struct mbuf *top, *n, *o;
70315689Swollman
70415689Swollman	MGET(n, how, m->m_type);
70515689Swollman	top = n;
70615689Swollman	if (!n)
70715689Swollman		goto nospace;
70815689Swollman
70915689Swollman	M_COPY_PKTHDR(n, m);
71015689Swollman	n->m_len = m->m_len;
71115689Swollman	if (m->m_flags & M_EXT) {
71215689Swollman		n->m_data = m->m_data;
71315689Swollman		n->m_ext = m->m_ext;
71415689Swollman		n->m_flags |= M_EXT;
71564837Sdwmalone		MEXT_ADD_REF(m);
71615689Swollman	} else {
71715689Swollman		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
71815689Swollman	}
71915689Swollman
72015689Swollman	m = m->m_next;
72115689Swollman	while (m) {
72215689Swollman		MGET(o, how, m->m_type);
72315689Swollman		if (!o)
72415689Swollman			goto nospace;
72515689Swollman
72615689Swollman		n->m_next = o;
72715689Swollman		n = n->m_next;
72815689Swollman
72915689Swollman		n->m_len = m->m_len;
73015689Swollman		if (m->m_flags & M_EXT) {
73115689Swollman			n->m_data = m->m_data;
73215689Swollman			n->m_ext = m->m_ext;
73315689Swollman			n->m_flags |= M_EXT;
73464837Sdwmalone			MEXT_ADD_REF(m);
73515689Swollman		} else {
73615689Swollman			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
73715689Swollman		}
73815689Swollman
73915689Swollman		m = m->m_next;
74015689Swollman	}
74115689Swollman	return top;
74215689Swollmannospace:
74315689Swollman	m_freem(top);
74466475Sbmilekic	atomic_add_long(&MCFail, 1);
74515689Swollman	return 0;
74615689Swollman}
74715689Swollman
74815689Swollman/*
7491541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning,
7501541Srgrimes * continuing for "len" bytes, into the indicated buffer.
7511541Srgrimes */
7521549Srgrimesvoid
7531541Srgrimesm_copydata(m, off, len, cp)
7541541Srgrimes	register struct mbuf *m;
7551541Srgrimes	register int off;
7561541Srgrimes	register int len;
7571541Srgrimes	caddr_t cp;
7581541Srgrimes{
7591541Srgrimes	register unsigned count;
7601541Srgrimes
76152201Salfred	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
76252201Salfred	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
7631541Srgrimes	while (off > 0) {
76452201Salfred		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
7651541Srgrimes		if (off < m->m_len)
7661541Srgrimes			break;
7671541Srgrimes		off -= m->m_len;
7681541Srgrimes		m = m->m_next;
7691541Srgrimes	}
7701541Srgrimes	while (len > 0) {
77152201Salfred		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
7721541Srgrimes		count = min(m->m_len - off, len);
7731541Srgrimes		bcopy(mtod(m, caddr_t) + off, cp, count);
7741541Srgrimes		len -= count;
7751541Srgrimes		cp += count;
7761541Srgrimes		off = 0;
7771541Srgrimes		m = m->m_next;
7781541Srgrimes	}
7791541Srgrimes}
7801541Srgrimes
7811541Srgrimes/*
78254002Sarchie * Copy a packet header mbuf chain into a completely new chain, including
78354002Sarchie * copying any mbuf clusters.  Use this instead of m_copypacket() when
78454002Sarchie * you need a writable copy of an mbuf chain.
78554002Sarchie */
78654002Sarchiestruct mbuf *
78754002Sarchiem_dup(m, how)
78854002Sarchie	struct mbuf *m;
78954002Sarchie	int how;
79054002Sarchie{
79154002Sarchie	struct mbuf **p, *top = NULL;
79254002Sarchie	int remain, moff, nsize;
79354002Sarchie
79454002Sarchie	/* Sanity check */
79554002Sarchie	if (m == NULL)
79654002Sarchie		return (0);
79754002Sarchie	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
79854002Sarchie
79954002Sarchie	/* While there's more data, get a new mbuf, tack it on, and fill it */
80054002Sarchie	remain = m->m_pkthdr.len;
80154002Sarchie	moff = 0;
80254002Sarchie	p = &top;
80354002Sarchie	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
80454002Sarchie		struct mbuf *n;
80554002Sarchie
80654002Sarchie		/* Get the next new mbuf */
80754002Sarchie		MGET(n, how, m->m_type);
80854002Sarchie		if (n == NULL)
80954002Sarchie			goto nospace;
81054002Sarchie		if (top == NULL) {		/* first one, must be PKTHDR */
81154002Sarchie			M_COPY_PKTHDR(n, m);
81254002Sarchie			nsize = MHLEN;
81354002Sarchie		} else				/* not the first one */
81454002Sarchie			nsize = MLEN;
81554002Sarchie		if (remain >= MINCLSIZE) {
81654002Sarchie			MCLGET(n, how);
81754002Sarchie			if ((n->m_flags & M_EXT) == 0) {
81854002Sarchie				(void)m_free(n);
81954002Sarchie				goto nospace;
82054002Sarchie			}
82154002Sarchie			nsize = MCLBYTES;
82254002Sarchie		}
82354002Sarchie		n->m_len = 0;
82454002Sarchie
82554002Sarchie		/* Link it into the new chain */
82654002Sarchie		*p = n;
82754002Sarchie		p = &n->m_next;
82854002Sarchie
82954002Sarchie		/* Copy data from original mbuf(s) into new mbuf */
83054002Sarchie		while (n->m_len < nsize && m != NULL) {
83154002Sarchie			int chunk = min(nsize - n->m_len, m->m_len - moff);
83254002Sarchie
83354002Sarchie			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
83454002Sarchie			moff += chunk;
83554002Sarchie			n->m_len += chunk;
83654002Sarchie			remain -= chunk;
83754002Sarchie			if (moff == m->m_len) {
83854002Sarchie				m = m->m_next;
83954002Sarchie				moff = 0;
84054002Sarchie			}
84154002Sarchie		}
84254002Sarchie
84354002Sarchie		/* Check correct total mbuf length */
84454002Sarchie		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
84554002Sarchie		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
84654002Sarchie	}
84754002Sarchie	return (top);
84854002Sarchie
84954002Sarchienospace:
85054002Sarchie	m_freem(top);
85166475Sbmilekic	atomic_add_long(&MCFail, 1);
85254002Sarchie	return (0);
85354002Sarchie}
85454002Sarchie
85554002Sarchie/*
8561541Srgrimes * Concatenate mbuf chain n to m.
8571541Srgrimes * Both chains must be of the same type (e.g. MT_DATA).
8581541Srgrimes * Any m_pkthdr is not updated.
8591541Srgrimes */
8601549Srgrimesvoid
8611541Srgrimesm_cat(m, n)
8621541Srgrimes	register struct mbuf *m, *n;
8631541Srgrimes{
8641541Srgrimes	while (m->m_next)
8651541Srgrimes		m = m->m_next;
8661541Srgrimes	while (n) {
8671541Srgrimes		if (m->m_flags & M_EXT ||
8681541Srgrimes		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
8691541Srgrimes			/* just join the two chains */
8701541Srgrimes			m->m_next = n;
8711541Srgrimes			return;
8721541Srgrimes		}
8731541Srgrimes		/* splat the data from one into the other */
8741541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
8751541Srgrimes		    (u_int)n->m_len);
8761541Srgrimes		m->m_len += n->m_len;
8771541Srgrimes		n = m_free(n);
8781541Srgrimes	}
8791541Srgrimes}
8801541Srgrimes
8811549Srgrimesvoid
8821541Srgrimesm_adj(mp, req_len)
8831541Srgrimes	struct mbuf *mp;
8841541Srgrimes	int req_len;
8851541Srgrimes{
8861541Srgrimes	register int len = req_len;
8871541Srgrimes	register struct mbuf *m;
88833678Sbde	register int count;
8891541Srgrimes
8901541Srgrimes	if ((m = mp) == NULL)
8911541Srgrimes		return;
8921541Srgrimes	if (len >= 0) {
8931541Srgrimes		/*
8941541Srgrimes		 * Trim from head.
8951541Srgrimes		 */
8961541Srgrimes		while (m != NULL && len > 0) {
8971541Srgrimes			if (m->m_len <= len) {
8981541Srgrimes				len -= m->m_len;
8991541Srgrimes				m->m_len = 0;
9001541Srgrimes				m = m->m_next;
9011541Srgrimes			} else {
9021541Srgrimes				m->m_len -= len;
9031541Srgrimes				m->m_data += len;
9041541Srgrimes				len = 0;
9051541Srgrimes			}
9061541Srgrimes		}
9071541Srgrimes		m = mp;
9081541Srgrimes		if (mp->m_flags & M_PKTHDR)
9091541Srgrimes			m->m_pkthdr.len -= (req_len - len);
9101541Srgrimes	} else {
9111541Srgrimes		/*
9121541Srgrimes		 * Trim from tail.  Scan the mbuf chain,
9131541Srgrimes		 * calculating its length and finding the last mbuf.
9141541Srgrimes		 * If the adjustment only affects this mbuf, then just
9151541Srgrimes		 * adjust and return.  Otherwise, rescan and truncate
9161541Srgrimes		 * after the remaining size.
9171541Srgrimes		 */
9181541Srgrimes		len = -len;
9191541Srgrimes		count = 0;
9201541Srgrimes		for (;;) {
9211541Srgrimes			count += m->m_len;
9221541Srgrimes			if (m->m_next == (struct mbuf *)0)
9231541Srgrimes				break;
9241541Srgrimes			m = m->m_next;
9251541Srgrimes		}
9261541Srgrimes		if (m->m_len >= len) {
9271541Srgrimes			m->m_len -= len;
9281541Srgrimes			if (mp->m_flags & M_PKTHDR)
9291541Srgrimes				mp->m_pkthdr.len -= len;
9301541Srgrimes			return;
9311541Srgrimes		}
9321541Srgrimes		count -= len;
9331541Srgrimes		if (count < 0)
9341541Srgrimes			count = 0;
9351541Srgrimes		/*
9361541Srgrimes		 * Correct length for chain is "count".
9371541Srgrimes		 * Find the mbuf with last data, adjust its length,
9381541Srgrimes		 * and toss data from remaining mbufs on chain.
9391541Srgrimes		 */
9401541Srgrimes		m = mp;
9411541Srgrimes		if (m->m_flags & M_PKTHDR)
9421541Srgrimes			m->m_pkthdr.len = count;
9431541Srgrimes		for (; m; m = m->m_next) {
9441541Srgrimes			if (m->m_len >= count) {
9451541Srgrimes				m->m_len = count;
9461541Srgrimes				break;
9471541Srgrimes			}
9481541Srgrimes			count -= m->m_len;
9491541Srgrimes		}
9503308Sphk		while (m->m_next)
9513308Sphk			(m = m->m_next) ->m_len = 0;
9521541Srgrimes	}
9531541Srgrimes}
9541541Srgrimes
9551541Srgrimes/*
9561541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous
9571541Srgrimes * and in the data area of an mbuf (so that mtod and dtom
9581541Srgrimes * will work for a structure of size len).  Returns the resulting
9591541Srgrimes * mbuf chain on success, frees it and returns null on failure.
9601541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the
9611541Srgrimes * contiguous region in an attempt to avoid being called next time.
9621541Srgrimes */
96323081Swollman#define MPFail (mbstat.m_mpfail)
9641541Srgrimes
9651541Srgrimesstruct mbuf *
9661541Srgrimesm_pullup(n, len)
9671541Srgrimes	register struct mbuf *n;
9681541Srgrimes	int len;
9691541Srgrimes{
9701541Srgrimes	register struct mbuf *m;
9711541Srgrimes	register int count;
9721541Srgrimes	int space;
9731541Srgrimes
9741541Srgrimes	/*
9751541Srgrimes	 * If first mbuf has no cluster, and has room for len bytes
9761541Srgrimes	 * without shifting current data, pullup into it,
9771541Srgrimes	 * otherwise allocate a new mbuf to prepend to the chain.
9781541Srgrimes	 */
9791541Srgrimes	if ((n->m_flags & M_EXT) == 0 &&
9801541Srgrimes	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
9811541Srgrimes		if (n->m_len >= len)
9821541Srgrimes			return (n);
9831541Srgrimes		m = n;
9841541Srgrimes		n = n->m_next;
9851541Srgrimes		len -= m->m_len;
9861541Srgrimes	} else {
9871541Srgrimes		if (len > MHLEN)
9881541Srgrimes			goto bad;
9891541Srgrimes		MGET(m, M_DONTWAIT, n->m_type);
9901541Srgrimes		if (m == 0)
9911541Srgrimes			goto bad;
9921541Srgrimes		m->m_len = 0;
9931541Srgrimes		if (n->m_flags & M_PKTHDR) {
9941541Srgrimes			M_COPY_PKTHDR(m, n);
9951541Srgrimes			n->m_flags &= ~M_PKTHDR;
9961541Srgrimes		}
9971541Srgrimes	}
9981541Srgrimes	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
9991541Srgrimes	do {
10001541Srgrimes		count = min(min(max(len, max_protohdr), space), n->m_len);
10011541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
10021541Srgrimes		  (unsigned)count);
10031541Srgrimes		len -= count;
10041541Srgrimes		m->m_len += count;
10051541Srgrimes		n->m_len -= count;
10061541Srgrimes		space -= count;
10071541Srgrimes		if (n->m_len)
10081541Srgrimes			n->m_data += count;
10091541Srgrimes		else
10101541Srgrimes			n = m_free(n);
10111541Srgrimes	} while (len > 0 && n);
10121541Srgrimes	if (len > 0) {
10131541Srgrimes		(void) m_free(m);
10141541Srgrimes		goto bad;
10151541Srgrimes	}
10161541Srgrimes	m->m_next = n;
10171541Srgrimes	return (m);
10181541Srgrimesbad:
10191541Srgrimes	m_freem(n);
102066475Sbmilekic	atomic_add_long(&MPFail, 1);
10211541Srgrimes	return (0);
10221541Srgrimes}
10231541Srgrimes
10241541Srgrimes/*
10251541Srgrimes * Partition an mbuf chain in two pieces, returning the tail --
10261541Srgrimes * all but the first len0 bytes.  In case of failure, it returns NULL and
10271541Srgrimes * attempts to restore the chain to its original state.
10281541Srgrimes */
10291541Srgrimesstruct mbuf *
10301541Srgrimesm_split(m0, len0, wait)
10311541Srgrimes	register struct mbuf *m0;
10321541Srgrimes	int len0, wait;
10331541Srgrimes{
10341541Srgrimes	register struct mbuf *m, *n;
10351541Srgrimes	unsigned len = len0, remain;
10361541Srgrimes
10371541Srgrimes	for (m = m0; m && len > m->m_len; m = m->m_next)
10381541Srgrimes		len -= m->m_len;
10391541Srgrimes	if (m == 0)
10401541Srgrimes		return (0);
10411541Srgrimes	remain = m->m_len - len;
10421541Srgrimes	if (m0->m_flags & M_PKTHDR) {
10431541Srgrimes		MGETHDR(n, wait, m0->m_type);
10441541Srgrimes		if (n == 0)
10451541Srgrimes			return (0);
10461541Srgrimes		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10471541Srgrimes		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
10481541Srgrimes		m0->m_pkthdr.len = len0;
10491541Srgrimes		if (m->m_flags & M_EXT)
10501541Srgrimes			goto extpacket;
10511541Srgrimes		if (remain > MHLEN) {
10521541Srgrimes			/* m can't be the lead packet */
10531541Srgrimes			MH_ALIGN(n, 0);
10541541Srgrimes			n->m_next = m_split(m, len, wait);
10551541Srgrimes			if (n->m_next == 0) {
10561541Srgrimes				(void) m_free(n);
10571541Srgrimes				return (0);
10581541Srgrimes			} else
10591541Srgrimes				return (n);
10601541Srgrimes		} else
10611541Srgrimes			MH_ALIGN(n, remain);
10621541Srgrimes	} else if (remain == 0) {
10631541Srgrimes		n = m->m_next;
10641541Srgrimes		m->m_next = 0;
10651541Srgrimes		return (n);
10661541Srgrimes	} else {
10671541Srgrimes		MGET(n, wait, m->m_type);
10681541Srgrimes		if (n == 0)
10691541Srgrimes			return (0);
10701541Srgrimes		M_ALIGN(n, remain);
10711541Srgrimes	}
10721541Srgrimesextpacket:
10731541Srgrimes	if (m->m_flags & M_EXT) {
10741541Srgrimes		n->m_flags |= M_EXT;
10751541Srgrimes		n->m_ext = m->m_ext;
107664837Sdwmalone		MEXT_ADD_REF(m);
10771541Srgrimes		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
10781541Srgrimes		n->m_data = m->m_data + len;
10791541Srgrimes	} else {
10801541Srgrimes		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
10811541Srgrimes	}
10821541Srgrimes	n->m_len = remain;
10831541Srgrimes	m->m_len = len;
10841541Srgrimes	n->m_next = m->m_next;
10851541Srgrimes	m->m_next = 0;
10861541Srgrimes	return (n);
10871541Srgrimes}
10881541Srgrimes/*
10891541Srgrimes * Routine to copy from device local memory into mbufs.
10901541Srgrimes */
10911541Srgrimesstruct mbuf *
10921541Srgrimesm_devget(buf, totlen, off0, ifp, copy)
10931541Srgrimes	char *buf;
10941541Srgrimes	int totlen, off0;
10951541Srgrimes	struct ifnet *ifp;
109612577Sbde	void (*copy) __P((char *from, caddr_t to, u_int len));
10971541Srgrimes{
10981541Srgrimes	register struct mbuf *m;
10991541Srgrimes	struct mbuf *top = 0, **mp = &top;
11001541Srgrimes	register int off = off0, len;
11011541Srgrimes	register char *cp;
11021541Srgrimes	char *epkt;
11031541Srgrimes
11041541Srgrimes	cp = buf;
11051541Srgrimes	epkt = cp + totlen;
11061541Srgrimes	if (off) {
11071541Srgrimes		cp += off + 2 * sizeof(u_short);
11081541Srgrimes		totlen -= 2 * sizeof(u_short);
11091541Srgrimes	}
11101541Srgrimes	MGETHDR(m, M_DONTWAIT, MT_DATA);
11111541Srgrimes	if (m == 0)
11121541Srgrimes		return (0);
11131541Srgrimes	m->m_pkthdr.rcvif = ifp;
11141541Srgrimes	m->m_pkthdr.len = totlen;
11151541Srgrimes	m->m_len = MHLEN;
11161541Srgrimes
11171541Srgrimes	while (totlen > 0) {
11181541Srgrimes		if (top) {
11191541Srgrimes			MGET(m, M_DONTWAIT, MT_DATA);
11201541Srgrimes			if (m == 0) {
11211541Srgrimes				m_freem(top);
11221541Srgrimes				return (0);
11231541Srgrimes			}
11241541Srgrimes			m->m_len = MLEN;
11251541Srgrimes		}
11261541Srgrimes		len = min(totlen, epkt - cp);
11271541Srgrimes		if (len >= MINCLSIZE) {
11281541Srgrimes			MCLGET(m, M_DONTWAIT);
11291541Srgrimes			if (m->m_flags & M_EXT)
11301541Srgrimes				m->m_len = len = min(len, MCLBYTES);
11311541Srgrimes			else
11321541Srgrimes				len = m->m_len;
11331541Srgrimes		} else {
11341541Srgrimes			/*
11351541Srgrimes			 * Place initial small packet/header at end of mbuf.
11361541Srgrimes			 */
11371541Srgrimes			if (len < m->m_len) {
11381541Srgrimes				if (top == 0 && len + max_linkhdr <= m->m_len)
11391541Srgrimes					m->m_data += max_linkhdr;
11401541Srgrimes				m->m_len = len;
11411541Srgrimes			} else
11421541Srgrimes				len = m->m_len;
11431541Srgrimes		}
11441541Srgrimes		if (copy)
11451541Srgrimes			copy(cp, mtod(m, caddr_t), (unsigned)len);
11461541Srgrimes		else
11471541Srgrimes			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
11481541Srgrimes		cp += len;
11491541Srgrimes		*mp = m;
11501541Srgrimes		mp = &m->m_next;
11511541Srgrimes		totlen -= len;
11521541Srgrimes		if (cp == epkt)
11531541Srgrimes			cp = buf;
11541541Srgrimes	}
11551541Srgrimes	return (top);
11561541Srgrimes}
11573352Sphk
11583352Sphk/*
11593352Sphk * Copy data from a buffer back into the indicated mbuf chain,
11603352Sphk * starting "off" bytes from the beginning, extending the mbuf
11613352Sphk * chain if necessary.
11623352Sphk */
11633352Sphkvoid
11643352Sphkm_copyback(m0, off, len, cp)
11653352Sphk	struct	mbuf *m0;
11663352Sphk	register int off;
11673352Sphk	register int len;
11683352Sphk	caddr_t cp;
11693352Sphk{
11703352Sphk	register int mlen;
11713352Sphk	register struct mbuf *m = m0, *n;
11723352Sphk	int totlen = 0;
11733352Sphk
11743352Sphk	if (m0 == 0)
11753352Sphk		return;
11763352Sphk	while (off > (mlen = m->m_len)) {
11773352Sphk		off -= mlen;
11783352Sphk		totlen += mlen;
11793352Sphk		if (m->m_next == 0) {
11803352Sphk			n = m_getclr(M_DONTWAIT, m->m_type);
11813352Sphk			if (n == 0)
11823352Sphk				goto out;
11833352Sphk			n->m_len = min(MLEN, len + off);
11843352Sphk			m->m_next = n;
11853352Sphk		}
11863352Sphk		m = m->m_next;
11873352Sphk	}
11883352Sphk	while (len > 0) {
11893352Sphk		mlen = min (m->m_len - off, len);
11903352Sphk		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
11913352Sphk		cp += mlen;
11923352Sphk		len -= mlen;
11933352Sphk		mlen += off;
11943352Sphk		off = 0;
11953352Sphk		totlen += mlen;
11963352Sphk		if (len == 0)
11973352Sphk			break;
11983352Sphk		if (m->m_next == 0) {
11993352Sphk			n = m_get(M_DONTWAIT, m->m_type);
12003352Sphk			if (n == 0)
12013352Sphk				break;
12023352Sphk			n->m_len = min(MLEN, len);
12033352Sphk			m->m_next = n;
12043352Sphk		}
12053352Sphk		m = m->m_next;
12063352Sphk	}
12073352Sphkout:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
12083352Sphk		m->m_pkthdr.len = totlen;
12093352Sphk}
121052756Sphk
121152756Sphkvoid
121252756Sphkm_print(const struct mbuf *m)
121352756Sphk{
121452756Sphk	int len;
121554906Seivind	const struct mbuf *m2;
121652756Sphk
121752756Sphk	len = m->m_pkthdr.len;
121852756Sphk	m2 = m;
121952756Sphk	while (len) {
122052756Sphk		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
122152756Sphk		len -= m2->m_len;
122252756Sphk		m2 = m2->m_next;
122352756Sphk	}
122452756Sphk	return;
122552756Sphk}
1226