uipc_mbuf.c revision 70254
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. All advertising materials mentioning features or use of this software
141541Srgrimes *    must display the following acknowledgement:
151541Srgrimes *	This product includes software developed by the University of
161541Srgrimes *	California, Berkeley and its contributors.
171541Srgrimes * 4. Neither the name of the University nor the names of its contributors
181541Srgrimes *    may be used to endorse or promote products derived from this software
191541Srgrimes *    without specific prior written permission.
201541Srgrimes *
211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311541Srgrimes * SUCH DAMAGE.
321541Srgrimes *
331541Srgrimes *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 70254 2000-12-21 21:44:31Z bmilekic $
351541Srgrimes */
361541Srgrimes
3748579Smsmith#include "opt_param.h"
381541Srgrimes#include <sys/param.h>
391541Srgrimes#include <sys/systm.h>
4032036Sbde#include <sys/malloc.h>
411541Srgrimes#include <sys/mbuf.h>
4267365Sjhb#include <sys/mutex.h>
431541Srgrimes#include <sys/kernel.h>
4423081Swollman#include <sys/sysctl.h>
451541Srgrimes#include <sys/domain.h>
461541Srgrimes#include <sys/protosw.h>
471541Srgrimes#include <vm/vm.h>
489759Sbde#include <vm/vm_kern.h>
4912662Sdg#include <vm/vm_extern.h>
501541Srgrimes
5110653Sdgstatic void mbinit __P((void *));
5210358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
5310358Sjulian
549759Sbdestruct mbuf *mbutl;
559759Sbdestruct mbstat mbstat;
5663203Salfredu_long	mbtypes[MT_NTYPES];
579759Sbdeint	max_linkhdr;
589759Sbdeint	max_protohdr;
599759Sbdeint	max_hdr;
609759Sbdeint	max_datalen;
6148579Smsmithint	nmbclusters;
6248579Smsmithint	nmbufs;
6367144Sbmilekicint	nmbcnt;
6466475Sbmilekicu_long	m_mballoc_wid = 0;
6566475Sbmilekicu_long	m_clalloc_wid = 0;
661541Srgrimes
6766475Sbmilekic/*
6866475Sbmilekic * freelist header structures...
6966475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst
7066475Sbmilekic */
7166475Sbmilekicstruct mbffree_lst mmbfree;
7266475Sbmilekicstruct mclfree_lst mclfree;
7366475Sbmilekicstruct mcntfree_lst mcntfree;
7466475Sbmilekic
7566475Sbmilekic/*
7666475Sbmilekic * sysctl(8) exported objects
7766475Sbmilekic */
7844078SdfrSYSCTL_DECL(_kern_ipc);
7923081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
8023081Swollman	   &max_linkhdr, 0, "");
8123081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
8223081Swollman	   &max_protohdr, 0, "");
8323081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
8423081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
8523081Swollman	   &max_datalen, 0, "");
8654478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
8754478Sgreen	   &mbuf_wait, 0, "");
8864048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
8963203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
9063203Salfred	   sizeof(mbtypes), "LU", "");
9148579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
9255171Smsmith	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
9355171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
9455171Smsmith	   "Maximum number of mbufs available");
9567144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
9667144Sbmilekic	   "Maximum number of ext_buf counters available");
9748579Smsmith#ifndef NMBCLUSTERS
9848579Smsmith#define NMBCLUSTERS	(512 + MAXUSERS * 16)
9948579Smsmith#endif
10048579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
10155171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
10267144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt);
10323081Swollman
10412819Sphkstatic void	m_reclaim __P((void));
10512819Sphk
10666475Sbmilekic/* Initial allocation numbers */
10764837Sdwmalone#define NCL_INIT	2
10815744Sphk#define NMB_INIT	16
10966475Sbmilekic#define REF_INIT	NMBCLUSTERS
11015744Sphk
11166475Sbmilekic/*
11266475Sbmilekic * Full mbuf subsystem initialization done here.
11366475Sbmilekic *
11466475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to
11566475Sbmilekic *      machdep.c - for now, there is no reason for this stuff to go there.
11666475Sbmilekic */
11710358Sjulianstatic void
11812569Sbdembinit(dummy)
11912569Sbde	void *dummy;
1201541Srgrimes{
12166475Sbmilekic	vm_offset_t maxaddr, mb_map_size;
1221541Srgrimes
12366475Sbmilekic	/*
12466475Sbmilekic	 * Setup the mb_map, allocate requested VM space.
12566475Sbmilekic	 */
12667144Sbmilekic	mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt
12766475Sbmilekic	    * sizeof(union mext_refcnt);
12866475Sbmilekic	mb_map_size = roundup2(mb_map_size, PAGE_SIZE);
12966475Sbmilekic	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
13066475Sbmilekic	    mb_map_size);
13166475Sbmilekic	/* XXX: mb_map->system_map = 1; */
13264837Sdwmalone
13366475Sbmilekic	/*
13466475Sbmilekic	 * Initialize the free list headers, and setup locks for lists.
13566475Sbmilekic	 */
13666475Sbmilekic	mmbfree.m_head = NULL;
13766475Sbmilekic	mclfree.m_head = NULL;
13866475Sbmilekic	mcntfree.m_head = NULL;
13966475Sbmilekic	mtx_init(&mmbfree.m_mtx, "mbuf free list lock", MTX_DEF);
14066475Sbmilekic	mtx_init(&mclfree.m_mtx, "mcluster free list lock", MTX_DEF);
14166475Sbmilekic	mtx_init(&mcntfree.m_mtx, "m_ext counter free list lock", MTX_DEF);
14266475Sbmilekic
14366475Sbmilekic	/*
14466475Sbmilekic	 * Initialize mbuf subsystem (sysctl exported) statistics structure.
14566475Sbmilekic	 */
14623081Swollman	mbstat.m_msize = MSIZE;
14723081Swollman	mbstat.m_mclbytes = MCLBYTES;
14823081Swollman	mbstat.m_minclsize = MINCLSIZE;
14923081Swollman	mbstat.m_mlen = MLEN;
15023081Swollman	mbstat.m_mhlen = MHLEN;
15123081Swollman
15266475Sbmilekic	/*
15366475Sbmilekic	 * Perform some initial allocations.
15466475Sbmilekic	 */
15566475Sbmilekic	mtx_enter(&mcntfree.m_mtx, MTX_DEF);
15666475Sbmilekic	if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
15764837Sdwmalone		goto bad;
15866475Sbmilekic	mtx_exit(&mcntfree.m_mtx, MTX_DEF);
15966475Sbmilekic
16066475Sbmilekic	mtx_enter(&mmbfree.m_mtx, MTX_DEF);
16115689Swollman	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
16215689Swollman		goto bad;
16366475Sbmilekic	mtx_exit(&mmbfree.m_mtx, MTX_DEF);
16466475Sbmilekic
16566475Sbmilekic	mtx_enter(&mclfree.m_mtx, MTX_DEF);
1661541Srgrimes	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
1671541Srgrimes		goto bad;
16866475Sbmilekic	mtx_exit(&mclfree.m_mtx, MTX_DEF);
16966475Sbmilekic
1701541Srgrimes	return;
1711541Srgrimesbad:
17264837Sdwmalone	panic("mbinit: failed to initialize mbuf subsystem!");
1731541Srgrimes}
1741541Srgrimes
1751541Srgrimes/*
17664837Sdwmalone * Allocate at least nmb reference count structs and place them
17764837Sdwmalone * on the ref cnt free list.
17866475Sbmilekic *
17966475Sbmilekic * Must be called with the mcntfree lock held.
18064837Sdwmalone */
18164837Sdwmaloneint
18266475Sbmilekicm_alloc_ref(nmb, how)
18364837Sdwmalone	u_int nmb;
18466475Sbmilekic	int how;
18564837Sdwmalone{
18664837Sdwmalone	caddr_t p;
18764837Sdwmalone	u_int nbytes;
18864837Sdwmalone	int i;
18964837Sdwmalone
19064837Sdwmalone	/*
19164837Sdwmalone	 * We don't cap the amount of memory that can be used
19264837Sdwmalone	 * by the reference counters, like we do for mbufs and
19366475Sbmilekic	 * mbuf clusters. In fact, we're absolutely sure that we
19466475Sbmilekic	 * won't ever be going over our allocated space. We keep enough
19566475Sbmilekic	 * space in mb_map to accomodate maximum values of allocatable
19666475Sbmilekic	 * external buffers including, but not limited to, clusters.
19766475Sbmilekic	 * (That's also why we won't have to have wait routines for
19866475Sbmilekic	 * counters).
19966475Sbmilekic	 *
20066475Sbmilekic	 * If we're in here, we're absolutely certain to be returning
20166475Sbmilekic	 * succesfully, as long as there is physical memory to accomodate
20266475Sbmilekic	 * us. And if there isn't, but we're willing to wait, then
20366475Sbmilekic	 * kmem_malloc() will do the only waiting needed.
20464837Sdwmalone	 */
20564837Sdwmalone
20664837Sdwmalone	nbytes = round_page(nmb * sizeof(union mext_refcnt));
20766475Sbmilekic	mtx_exit(&mcntfree.m_mtx, MTX_DEF);
20866475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
20970254Sbmilekic	if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
21070254Sbmilekic	    M_WAITOK : M_NOWAIT)) == NULL) {
21166475Sbmilekic		mtx_exit(&Giant, MTX_DEF);
21266475Sbmilekic		mtx_enter(&mcntfree.m_mtx, MTX_DEF); /* XXX: We must be	holding
21366475Sbmilekic						             it going out. */
21464837Sdwmalone		return (0);
21566475Sbmilekic	}
21666475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
21764837Sdwmalone	nmb = nbytes / sizeof(union mext_refcnt);
21864837Sdwmalone
21966475Sbmilekic	/*
22066475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
22166475Sbmilekic	 * It is up to the caller to let go of the mutex.
22266475Sbmilekic	 */
22366475Sbmilekic	mtx_enter(&mcntfree.m_mtx, MTX_DEF);
22464837Sdwmalone	for (i = 0; i < nmb; i++) {
22566475Sbmilekic		((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
22666475Sbmilekic		mcntfree.m_head = (union mext_refcnt *)p;
22764837Sdwmalone		p += sizeof(union mext_refcnt);
22864837Sdwmalone		mbstat.m_refree++;
22964837Sdwmalone	}
23064837Sdwmalone	mbstat.m_refcnt += nmb;
23164837Sdwmalone
23264837Sdwmalone	return (1);
23364837Sdwmalone}
23464837Sdwmalone
23564837Sdwmalone/*
23615689Swollman * Allocate at least nmb mbufs and place on mbuf free list.
23766475Sbmilekic *
23866475Sbmilekic * Must be called with the mmbfree lock held.
23915689Swollman */
24015689Swollmanint
24132036Sbdem_mballoc(nmb, how)
24215689Swollman	register int nmb;
24332036Sbde	int how;
24415689Swollman{
24515689Swollman	register caddr_t p;
24615689Swollman	register int i;
24715689Swollman	int nbytes;
24815689Swollman
24954478Sgreen	/*
25066475Sbmilekic	 * If we've hit the mbuf limit, stop allocating from mb_map.
25166475Sbmilekic	 * Also, once we run out of map space, it will be impossible to
25266475Sbmilekic	 * get any more (nothing is ever freed back to the map).
25355171Smsmith	 */
25466475Sbmilekic	if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) {
25566475Sbmilekic		/*
25666475Sbmilekic		 * Needs to be atomic as we may be incrementing it
25766475Sbmilekic		 * while holding another mutex, like mclfree. In other
25866475Sbmilekic		 * words, m_drops is not reserved solely for mbufs,
25966475Sbmilekic		 * but is also available for clusters.
26066475Sbmilekic		 */
26166475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
26255171Smsmith		return (0);
26366475Sbmilekic	}
26455171Smsmith
26566475Sbmilekic	nbytes = round_page(nmb * MSIZE);
26615689Swollman
26766475Sbmilekic	/* XXX: The letting go of the mmbfree lock here may eventually
26870254Sbmilekic	   be moved to only be done for M_TRYWAIT calls to kmem_malloc() */
26966475Sbmilekic	mtx_exit(&mmbfree.m_mtx, MTX_DEF);
27066475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
27122899Swollman	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
27270254Sbmilekic	if (p == 0 && how == M_TRYWAIT) {
27366475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
27422899Swollman		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
27522899Swollman	}
27666475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
27766475Sbmilekic	mtx_enter(&mmbfree.m_mtx, MTX_DEF);
27822899Swollman
27915689Swollman	/*
28066475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
28115689Swollman	 * are no pages left.
28215689Swollman	 */
28315689Swollman	if (p == NULL)
28415689Swollman		return (0);
28515689Swollman
28615689Swollman	nmb = nbytes / MSIZE;
28766475Sbmilekic
28866475Sbmilekic	/*
28966475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
29066475Sbmilekic	 * It is up to the caller to let go of the mutex when done
29166475Sbmilekic	 * with grabbing the mbuf from the free list.
29266475Sbmilekic	 */
29315689Swollman	for (i = 0; i < nmb; i++) {
29466475Sbmilekic		((struct mbuf *)p)->m_next = mmbfree.m_head;
29566475Sbmilekic		mmbfree.m_head = (struct mbuf *)p;
29615689Swollman		p += MSIZE;
29715689Swollman	}
29815689Swollman	mbstat.m_mbufs += nmb;
29963203Salfred	mbtypes[MT_FREE] += nmb;
30015689Swollman	return (1);
30115689Swollman}
30215689Swollman
30354478Sgreen/*
30454478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros
30570254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
30670254Sbmilekic * rely solely on reclaimed mbufs.
30766475Sbmilekic *
30866475Sbmilekic * Here we request for the protocols to free up some resources and, if we
30966475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a
31054478Sgreen * designated (mbuf_wait) time.
31166475Sbmilekic *
31266475Sbmilekic * Must be called with the mmbfree mutex held, and we will probably end
31366475Sbmilekic * up recursing into that lock from some of the drain routines, but
31466475Sbmilekic * this should be okay, as long as we don't block there, or attempt
31566475Sbmilekic * to allocate from them (theoretically impossible).
31654478Sgreen */
31754478Sgreenstruct mbuf *
31866475Sbmilekicm_mballoc_wait(void)
31954478Sgreen{
32066475Sbmilekic	struct mbuf *p = NULL;
32154478Sgreen
32254478Sgreen	/*
32366475Sbmilekic	 * See if we can drain some resources out of the protocols.
32454478Sgreen	 */
32566475Sbmilekic	m_reclaim();
32666475Sbmilekic	_MGET(p, M_DONTWAIT);
32754478Sgreen
32866475Sbmilekic	if (p == NULL) {
32966475Sbmilekic		m_mballoc_wid++;
33066475Sbmilekic		if (msleep(&m_mballoc_wid, &mmbfree.m_mtx, PVM, "mballc",
33166475Sbmilekic		    mbuf_wait) == EWOULDBLOCK)
33266475Sbmilekic			m_mballoc_wid--;
33366475Sbmilekic
33466475Sbmilekic		/*
33566475Sbmilekic		 * Try again (one last time).
33666475Sbmilekic		 *
33766475Sbmilekic		 * We retry to fetch _even_ if the sleep timed out. This
33866475Sbmilekic		 * is left this way, purposely, in the [unlikely] case
33966475Sbmilekic		 * that an mbuf was freed but the sleep was not awoken
34066475Sbmilekic		 * in time.
34166475Sbmilekic		 *
34266475Sbmilekic		 * If the sleep didn't time out (i.e. we got woken up) then
34366475Sbmilekic		 * we have the lock so we just grab an mbuf, hopefully.
34466475Sbmilekic		 */
34566475Sbmilekic		_MGET(p, M_DONTWAIT);
34654478Sgreen	}
34754478Sgreen
34866475Sbmilekic	/* If we waited and got something... */
34966475Sbmilekic	if (p != NULL) {
35066475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
35166475Sbmilekic		if (mmbfree.m_head != NULL)
35266475Sbmilekic			MBWAKEUP(m_mballoc_wid);
35366475Sbmilekic	} else
35466475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
35522671Swollman
35666475Sbmilekic	return (p);
35722671Swollman}
35822671Swollman
35915689Swollman/*
3601541Srgrimes * Allocate some number of mbuf clusters
3611541Srgrimes * and place on cluster free list.
36266475Sbmilekic *
36366475Sbmilekic * Must be called with the mclfree lock held.
3641541Srgrimes */
3651549Srgrimesint
36632036Sbdem_clalloc(ncl, how)
3671541Srgrimes	register int ncl;
36832036Sbde	int how;
3691541Srgrimes{
3701541Srgrimes	register caddr_t p;
3711541Srgrimes	register int i;
3721541Srgrimes	int npg;
3731541Srgrimes
3747066Sdg	/*
37566475Sbmilekic	 * If the map is now full (nothing will ever be freed to it).
37655171Smsmith	 * If we've hit the mcluster number limit, stop allocating from
37766475Sbmilekic	 * mb_map.
37855171Smsmith	 */
37966475Sbmilekic	if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) {
38066475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
38155171Smsmith		return (0);
38255171Smsmith	}
38355171Smsmith
38415543Sphk	npg = ncl;
38566475Sbmilekic	mtx_exit(&mclfree.m_mtx, MTX_DEF);
38666475Sbmilekic	mtx_enter(&Giant, MTX_DEF);
38721737Sdg	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
38870254Sbmilekic				 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
38966475Sbmilekic	mtx_exit(&Giant, MTX_DEF);
39022671Swollman	ncl = ncl * PAGE_SIZE / MCLBYTES;
39166475Sbmilekic	mtx_enter(&mclfree.m_mtx, MTX_DEF);
39266475Sbmilekic
3937066Sdg	/*
39466475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
3957066Sdg	 * are no pages left.
3967066Sdg	 */
39722899Swollman	if (p == NULL) {
39866475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
3991541Srgrimes		return (0);
40022899Swollman	}
4017066Sdg
40266475Sbmilekic	/*
40366475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
40466475Sbmilekic	 */
4051541Srgrimes	for (i = 0; i < ncl; i++) {
40666475Sbmilekic		((union mcluster *)p)->mcl_next = mclfree.m_head;
40766475Sbmilekic		mclfree.m_head = (union mcluster *)p;
4081541Srgrimes		p += MCLBYTES;
4091541Srgrimes		mbstat.m_clfree++;
4101541Srgrimes	}
4111541Srgrimes	mbstat.m_clusters += ncl;
4121541Srgrimes	return (1);
4131541Srgrimes}
4141541Srgrimes
4151541Srgrimes/*
41654478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with
41770254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
41854478Sgreen * sleep for a designated amount of time (mbuf_wait) or until we're woken up
41954478Sgreen * due to sudden mcluster availability.
42066475Sbmilekic *
42166475Sbmilekic * Must be called with the mclfree lock held.
42254478Sgreen */
42354478Sgreencaddr_t
42454478Sgreenm_clalloc_wait(void)
42554478Sgreen{
42666475Sbmilekic	caddr_t p = NULL;
42754478Sgreen
42854478Sgreen	m_clalloc_wid++;
42966475Sbmilekic	if (msleep(&m_clalloc_wid, &mclfree.m_mtx, PVM, "mclalc", mbuf_wait)
43066475Sbmilekic	    == EWOULDBLOCK)
43154478Sgreen		m_clalloc_wid--;
43254478Sgreen
43354478Sgreen	/*
43466475Sbmilekic	 * Now that we (think) that we've got something, try again.
43554478Sgreen	 */
43664837Sdwmalone	_MCLALLOC(p, M_DONTWAIT);
43754478Sgreen
43866475Sbmilekic	/* If we waited and got something ... */
43966475Sbmilekic	if (p != NULL) {
44066475Sbmilekic		atomic_add_long(&mbstat.m_wait, 1);
44166475Sbmilekic		if (mclfree.m_head != NULL)
44266475Sbmilekic			MBWAKEUP(m_clalloc_wid);
44366475Sbmilekic	} else
44466475Sbmilekic		atomic_add_long(&mbstat.m_drops, 1);
44554478Sgreen
44654478Sgreen	return (p);
44754478Sgreen}
44854478Sgreen
44954478Sgreen/*
45066475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources...
45166475Sbmilekic *
45266475Sbmilekic * Should be called with mmbfree.m_mtx mutex held. We will most likely
45366475Sbmilekic * recursively grab it from within some drain routines, but that's okay,
45466475Sbmilekic * as the mutex will never be completely released until we let go of it
45566475Sbmilekic * after our m_reclaim() is over.
45666475Sbmilekic *
45766475Sbmilekic * Note: Drain routines are only allowed to free mbufs (and mclusters,
45866475Sbmilekic *	 as a consequence, if need be). They are not allowed to allocate
45966475Sbmilekic *	 new ones (that would defeat the purpose, anyway).
4601541Srgrimes */
46112819Sphkstatic void
4621541Srgrimesm_reclaim()
4631541Srgrimes{
4641541Srgrimes	register struct domain *dp;
4651541Srgrimes	register struct protosw *pr;
4661541Srgrimes
4671541Srgrimes	for (dp = domains; dp; dp = dp->dom_next)
4681541Srgrimes		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
4691541Srgrimes			if (pr->pr_drain)
4701541Srgrimes				(*pr->pr_drain)();
4711541Srgrimes	mbstat.m_drain++;
4721541Srgrimes}
4731541Srgrimes
4741541Srgrimes/*
4751541Srgrimes * Space allocation routines.
4761541Srgrimes * These are also available as macros
4771541Srgrimes * for critical paths.
4781541Srgrimes */
4791541Srgrimesstruct mbuf *
48032036Sbdem_get(how, type)
48132036Sbde	int how, type;
4821541Srgrimes{
4831541Srgrimes	register struct mbuf *m;
4841541Srgrimes
48532036Sbde	MGET(m, how, type);
4861541Srgrimes	return (m);
4871541Srgrimes}
4881541Srgrimes
4891541Srgrimesstruct mbuf *
49032036Sbdem_gethdr(how, type)
49132036Sbde	int how, type;
4921541Srgrimes{
4931541Srgrimes	register struct mbuf *m;
4941541Srgrimes
49532036Sbde	MGETHDR(m, how, type);
4961541Srgrimes	return (m);
4971541Srgrimes}
4981541Srgrimes
4991541Srgrimesstruct mbuf *
50032036Sbdem_getclr(how, type)
50132036Sbde	int how, type;
5021541Srgrimes{
5031541Srgrimes	register struct mbuf *m;
5041541Srgrimes
50532036Sbde	MGET(m, how, type);
5061541Srgrimes	if (m == 0)
5071541Srgrimes		return (0);
5081541Srgrimes	bzero(mtod(m, caddr_t), MLEN);
5091541Srgrimes	return (m);
5101541Srgrimes}
5111541Srgrimes
5121541Srgrimesstruct mbuf *
5131541Srgrimesm_free(m)
5141541Srgrimes	struct mbuf *m;
5151541Srgrimes{
5161541Srgrimes	register struct mbuf *n;
5171541Srgrimes
5181541Srgrimes	MFREE(m, n);
5191541Srgrimes	return (n);
5201541Srgrimes}
5211541Srgrimes
5221541Srgrimesvoid
5231541Srgrimesm_freem(m)
5241541Srgrimes	register struct mbuf *m;
5251541Srgrimes{
5261541Srgrimes	register struct mbuf *n;
5271541Srgrimes
5281541Srgrimes	if (m == NULL)
5291541Srgrimes		return;
5301541Srgrimes	do {
53162587Sitojun		/*
53262587Sitojun		 * we do need to check non-first mbuf, since some of existing
53362587Sitojun		 * code does not call M_PREPEND properly.
53462587Sitojun		 * (example: call to bpf_mtap from drivers)
53562587Sitojun		 */
53662587Sitojun		if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) {
53762587Sitojun			m_freem(m->m_pkthdr.aux);
53862587Sitojun			m->m_pkthdr.aux = NULL;
53962587Sitojun		}
5401541Srgrimes		MFREE(m, n);
5413308Sphk		m = n;
5423308Sphk	} while (m);
5431541Srgrimes}
5441541Srgrimes
5451541Srgrimes/*
5461541Srgrimes * Mbuffer utility routines.
5471541Srgrimes */
5481541Srgrimes
5491541Srgrimes/*
5501541Srgrimes * Lesser-used path for M_PREPEND:
5511541Srgrimes * allocate new mbuf to prepend to chain,
5521541Srgrimes * copy junk along.
5531541Srgrimes */
5541541Srgrimesstruct mbuf *
5551541Srgrimesm_prepend(m, len, how)
5561541Srgrimes	register struct mbuf *m;
5571541Srgrimes	int len, how;
5581541Srgrimes{
5591541Srgrimes	struct mbuf *mn;
5601541Srgrimes
5611541Srgrimes	MGET(mn, how, m->m_type);
5621541Srgrimes	if (mn == (struct mbuf *)NULL) {
5631541Srgrimes		m_freem(m);
5641541Srgrimes		return ((struct mbuf *)NULL);
5651541Srgrimes	}
5661541Srgrimes	if (m->m_flags & M_PKTHDR) {
5671541Srgrimes		M_COPY_PKTHDR(mn, m);
5681541Srgrimes		m->m_flags &= ~M_PKTHDR;
5691541Srgrimes	}
5701541Srgrimes	mn->m_next = m;
5711541Srgrimes	m = mn;
5721541Srgrimes	if (len < MHLEN)
5731541Srgrimes		MH_ALIGN(m, len);
5741541Srgrimes	m->m_len = len;
5751541Srgrimes	return (m);
5761541Srgrimes}
5771541Srgrimes
5781541Srgrimes/*
5791541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
5801541Srgrimes * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
58170254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
58254002Sarchie * Note that the copy is read-only, because clusters are not copied,
58354002Sarchie * only their reference counts are incremented.
5841541Srgrimes */
58523081Swollman#define MCFail (mbstat.m_mcfail)
5861541Srgrimes
5871541Srgrimesstruct mbuf *
5881541Srgrimesm_copym(m, off0, len, wait)
5891541Srgrimes	register struct mbuf *m;
5901541Srgrimes	int off0, wait;
5911541Srgrimes	register int len;
5921541Srgrimes{
5931541Srgrimes	register struct mbuf *n, **np;
5941541Srgrimes	register int off = off0;
5951541Srgrimes	struct mbuf *top;
5961541Srgrimes	int copyhdr = 0;
5971541Srgrimes
59852201Salfred	KASSERT(off >= 0, ("m_copym, negative off %d", off));
59952201Salfred	KASSERT(len >= 0, ("m_copym, negative len %d", len));
6001541Srgrimes	if (off == 0 && m->m_flags & M_PKTHDR)
6011541Srgrimes		copyhdr = 1;
6021541Srgrimes	while (off > 0) {
60352201Salfred		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
6041541Srgrimes		if (off < m->m_len)
6051541Srgrimes			break;
6061541Srgrimes		off -= m->m_len;
6071541Srgrimes		m = m->m_next;
6081541Srgrimes	}
6091541Srgrimes	np = &top;
6101541Srgrimes	top = 0;
6111541Srgrimes	while (len > 0) {
6121541Srgrimes		if (m == 0) {
61352201Salfred			KASSERT(len == M_COPYALL,
61452201Salfred			    ("m_copym, length > size of mbuf chain"));
6151541Srgrimes			break;
6161541Srgrimes		}
6171541Srgrimes		MGET(n, wait, m->m_type);
6181541Srgrimes		*np = n;
6191541Srgrimes		if (n == 0)
6201541Srgrimes			goto nospace;
6211541Srgrimes		if (copyhdr) {
6221541Srgrimes			M_COPY_PKTHDR(n, m);
6231541Srgrimes			if (len == M_COPYALL)
6241541Srgrimes				n->m_pkthdr.len -= off0;
6251541Srgrimes			else
6261541Srgrimes				n->m_pkthdr.len = len;
6271541Srgrimes			copyhdr = 0;
6281541Srgrimes		}
6291541Srgrimes		n->m_len = min(len, m->m_len - off);
6301541Srgrimes		if (m->m_flags & M_EXT) {
6311541Srgrimes			n->m_data = m->m_data + off;
6321541Srgrimes			n->m_ext = m->m_ext;
6331541Srgrimes			n->m_flags |= M_EXT;
63464837Sdwmalone			MEXT_ADD_REF(m);
6351541Srgrimes		} else
6361541Srgrimes			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
6371541Srgrimes			    (unsigned)n->m_len);
6381541Srgrimes		if (len != M_COPYALL)
6391541Srgrimes			len -= n->m_len;
6401541Srgrimes		off = 0;
6411541Srgrimes		m = m->m_next;
6421541Srgrimes		np = &n->m_next;
6431541Srgrimes	}
6441541Srgrimes	if (top == 0)
64566475Sbmilekic		atomic_add_long(&MCFail, 1);
6461541Srgrimes	return (top);
6471541Srgrimesnospace:
6481541Srgrimes	m_freem(top);
64966475Sbmilekic	atomic_add_long(&MCFail, 1);
6501541Srgrimes	return (0);
6511541Srgrimes}
6521541Srgrimes
6531541Srgrimes/*
65415689Swollman * Copy an entire packet, including header (which must be present).
65515689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
65654002Sarchie * Note that the copy is read-only, because clusters are not copied,
65754002Sarchie * only their reference counts are incremented.
65815689Swollman */
65915689Swollmanstruct mbuf *
66015689Swollmanm_copypacket(m, how)
66115689Swollman	struct mbuf *m;
66215689Swollman	int how;
66315689Swollman{
66415689Swollman	struct mbuf *top, *n, *o;
66515689Swollman
66615689Swollman	MGET(n, how, m->m_type);
66715689Swollman	top = n;
66815689Swollman	if (!n)
66915689Swollman		goto nospace;
67015689Swollman
67115689Swollman	M_COPY_PKTHDR(n, m);
67215689Swollman	n->m_len = m->m_len;
67315689Swollman	if (m->m_flags & M_EXT) {
67415689Swollman		n->m_data = m->m_data;
67515689Swollman		n->m_ext = m->m_ext;
67615689Swollman		n->m_flags |= M_EXT;
67764837Sdwmalone		MEXT_ADD_REF(m);
67815689Swollman	} else {
67915689Swollman		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
68015689Swollman	}
68115689Swollman
68215689Swollman	m = m->m_next;
68315689Swollman	while (m) {
68415689Swollman		MGET(o, how, m->m_type);
68515689Swollman		if (!o)
68615689Swollman			goto nospace;
68715689Swollman
68815689Swollman		n->m_next = o;
68915689Swollman		n = n->m_next;
69015689Swollman
69115689Swollman		n->m_len = m->m_len;
69215689Swollman		if (m->m_flags & M_EXT) {
69315689Swollman			n->m_data = m->m_data;
69415689Swollman			n->m_ext = m->m_ext;
69515689Swollman			n->m_flags |= M_EXT;
69664837Sdwmalone			MEXT_ADD_REF(m);
69715689Swollman		} else {
69815689Swollman			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
69915689Swollman		}
70015689Swollman
70115689Swollman		m = m->m_next;
70215689Swollman	}
70315689Swollman	return top;
70415689Swollmannospace:
70515689Swollman	m_freem(top);
70666475Sbmilekic	atomic_add_long(&MCFail, 1);
70715689Swollman	return 0;
70815689Swollman}
70915689Swollman
71015689Swollman/*
7111541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning,
7121541Srgrimes * continuing for "len" bytes, into the indicated buffer.
7131541Srgrimes */
7141549Srgrimesvoid
7151541Srgrimesm_copydata(m, off, len, cp)
7161541Srgrimes	register struct mbuf *m;
7171541Srgrimes	register int off;
7181541Srgrimes	register int len;
7191541Srgrimes	caddr_t cp;
7201541Srgrimes{
7211541Srgrimes	register unsigned count;
7221541Srgrimes
72352201Salfred	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
72452201Salfred	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
7251541Srgrimes	while (off > 0) {
72652201Salfred		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
7271541Srgrimes		if (off < m->m_len)
7281541Srgrimes			break;
7291541Srgrimes		off -= m->m_len;
7301541Srgrimes		m = m->m_next;
7311541Srgrimes	}
7321541Srgrimes	while (len > 0) {
73352201Salfred		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
7341541Srgrimes		count = min(m->m_len - off, len);
7351541Srgrimes		bcopy(mtod(m, caddr_t) + off, cp, count);
7361541Srgrimes		len -= count;
7371541Srgrimes		cp += count;
7381541Srgrimes		off = 0;
7391541Srgrimes		m = m->m_next;
7401541Srgrimes	}
7411541Srgrimes}
7421541Srgrimes
7431541Srgrimes/*
74454002Sarchie * Copy a packet header mbuf chain into a completely new chain, including
74554002Sarchie * copying any mbuf clusters.  Use this instead of m_copypacket() when
74654002Sarchie * you need a writable copy of an mbuf chain.
74754002Sarchie */
74854002Sarchiestruct mbuf *
74954002Sarchiem_dup(m, how)
75054002Sarchie	struct mbuf *m;
75154002Sarchie	int how;
75254002Sarchie{
75354002Sarchie	struct mbuf **p, *top = NULL;
75454002Sarchie	int remain, moff, nsize;
75554002Sarchie
75654002Sarchie	/* Sanity check */
75754002Sarchie	if (m == NULL)
75854002Sarchie		return (0);
75954002Sarchie	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
76054002Sarchie
76154002Sarchie	/* While there's more data, get a new mbuf, tack it on, and fill it */
76254002Sarchie	remain = m->m_pkthdr.len;
76354002Sarchie	moff = 0;
76454002Sarchie	p = &top;
76554002Sarchie	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
76654002Sarchie		struct mbuf *n;
76754002Sarchie
76854002Sarchie		/* Get the next new mbuf */
76954002Sarchie		MGET(n, how, m->m_type);
77054002Sarchie		if (n == NULL)
77154002Sarchie			goto nospace;
77254002Sarchie		if (top == NULL) {		/* first one, must be PKTHDR */
77354002Sarchie			M_COPY_PKTHDR(n, m);
77454002Sarchie			nsize = MHLEN;
77554002Sarchie		} else				/* not the first one */
77654002Sarchie			nsize = MLEN;
77754002Sarchie		if (remain >= MINCLSIZE) {
77854002Sarchie			MCLGET(n, how);
77954002Sarchie			if ((n->m_flags & M_EXT) == 0) {
78054002Sarchie				(void)m_free(n);
78154002Sarchie				goto nospace;
78254002Sarchie			}
78354002Sarchie			nsize = MCLBYTES;
78454002Sarchie		}
78554002Sarchie		n->m_len = 0;
78654002Sarchie
78754002Sarchie		/* Link it into the new chain */
78854002Sarchie		*p = n;
78954002Sarchie		p = &n->m_next;
79054002Sarchie
79154002Sarchie		/* Copy data from original mbuf(s) into new mbuf */
79254002Sarchie		while (n->m_len < nsize && m != NULL) {
79354002Sarchie			int chunk = min(nsize - n->m_len, m->m_len - moff);
79454002Sarchie
79554002Sarchie			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
79654002Sarchie			moff += chunk;
79754002Sarchie			n->m_len += chunk;
79854002Sarchie			remain -= chunk;
79954002Sarchie			if (moff == m->m_len) {
80054002Sarchie				m = m->m_next;
80154002Sarchie				moff = 0;
80254002Sarchie			}
80354002Sarchie		}
80454002Sarchie
80554002Sarchie		/* Check correct total mbuf length */
80654002Sarchie		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
80754002Sarchie		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
80854002Sarchie	}
80954002Sarchie	return (top);
81054002Sarchie
81154002Sarchienospace:
81254002Sarchie	m_freem(top);
81366475Sbmilekic	atomic_add_long(&MCFail, 1);
81454002Sarchie	return (0);
81554002Sarchie}
81654002Sarchie
81754002Sarchie/*
8181541Srgrimes * Concatenate mbuf chain n to m.
8191541Srgrimes * Both chains must be of the same type (e.g. MT_DATA).
8201541Srgrimes * Any m_pkthdr is not updated.
8211541Srgrimes */
8221549Srgrimesvoid
8231541Srgrimesm_cat(m, n)
8241541Srgrimes	register struct mbuf *m, *n;
8251541Srgrimes{
8261541Srgrimes	while (m->m_next)
8271541Srgrimes		m = m->m_next;
8281541Srgrimes	while (n) {
8291541Srgrimes		if (m->m_flags & M_EXT ||
8301541Srgrimes		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
8311541Srgrimes			/* just join the two chains */
8321541Srgrimes			m->m_next = n;
8331541Srgrimes			return;
8341541Srgrimes		}
8351541Srgrimes		/* splat the data from one into the other */
8361541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
8371541Srgrimes		    (u_int)n->m_len);
8381541Srgrimes		m->m_len += n->m_len;
8391541Srgrimes		n = m_free(n);
8401541Srgrimes	}
8411541Srgrimes}
8421541Srgrimes
8431549Srgrimesvoid
8441541Srgrimesm_adj(mp, req_len)
8451541Srgrimes	struct mbuf *mp;
8461541Srgrimes	int req_len;
8471541Srgrimes{
8481541Srgrimes	register int len = req_len;
8491541Srgrimes	register struct mbuf *m;
85033678Sbde	register int count;
8511541Srgrimes
8521541Srgrimes	if ((m = mp) == NULL)
8531541Srgrimes		return;
8541541Srgrimes	if (len >= 0) {
8551541Srgrimes		/*
8561541Srgrimes		 * Trim from head.
8571541Srgrimes		 */
8581541Srgrimes		while (m != NULL && len > 0) {
8591541Srgrimes			if (m->m_len <= len) {
8601541Srgrimes				len -= m->m_len;
8611541Srgrimes				m->m_len = 0;
8621541Srgrimes				m = m->m_next;
8631541Srgrimes			} else {
8641541Srgrimes				m->m_len -= len;
8651541Srgrimes				m->m_data += len;
8661541Srgrimes				len = 0;
8671541Srgrimes			}
8681541Srgrimes		}
8691541Srgrimes		m = mp;
8701541Srgrimes		if (mp->m_flags & M_PKTHDR)
8711541Srgrimes			m->m_pkthdr.len -= (req_len - len);
8721541Srgrimes	} else {
8731541Srgrimes		/*
8741541Srgrimes		 * Trim from tail.  Scan the mbuf chain,
8751541Srgrimes		 * calculating its length and finding the last mbuf.
8761541Srgrimes		 * If the adjustment only affects this mbuf, then just
8771541Srgrimes		 * adjust and return.  Otherwise, rescan and truncate
8781541Srgrimes		 * after the remaining size.
8791541Srgrimes		 */
8801541Srgrimes		len = -len;
8811541Srgrimes		count = 0;
8821541Srgrimes		for (;;) {
8831541Srgrimes			count += m->m_len;
8841541Srgrimes			if (m->m_next == (struct mbuf *)0)
8851541Srgrimes				break;
8861541Srgrimes			m = m->m_next;
8871541Srgrimes		}
8881541Srgrimes		if (m->m_len >= len) {
8891541Srgrimes			m->m_len -= len;
8901541Srgrimes			if (mp->m_flags & M_PKTHDR)
8911541Srgrimes				mp->m_pkthdr.len -= len;
8921541Srgrimes			return;
8931541Srgrimes		}
8941541Srgrimes		count -= len;
8951541Srgrimes		if (count < 0)
8961541Srgrimes			count = 0;
8971541Srgrimes		/*
8981541Srgrimes		 * Correct length for chain is "count".
8991541Srgrimes		 * Find the mbuf with last data, adjust its length,
9001541Srgrimes		 * and toss data from remaining mbufs on chain.
9011541Srgrimes		 */
9021541Srgrimes		m = mp;
9031541Srgrimes		if (m->m_flags & M_PKTHDR)
9041541Srgrimes			m->m_pkthdr.len = count;
9051541Srgrimes		for (; m; m = m->m_next) {
9061541Srgrimes			if (m->m_len >= count) {
9071541Srgrimes				m->m_len = count;
9081541Srgrimes				break;
9091541Srgrimes			}
9101541Srgrimes			count -= m->m_len;
9111541Srgrimes		}
9123308Sphk		while (m->m_next)
9133308Sphk			(m = m->m_next) ->m_len = 0;
9141541Srgrimes	}
9151541Srgrimes}
9161541Srgrimes
9171541Srgrimes/*
9181541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous
9191541Srgrimes * and in the data area of an mbuf (so that mtod and dtom
9201541Srgrimes * will work for a structure of size len).  Returns the resulting
9211541Srgrimes * mbuf chain on success, frees it and returns null on failure.
9221541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the
9231541Srgrimes * contiguous region in an attempt to avoid being called next time.
9241541Srgrimes */
92523081Swollman#define MPFail (mbstat.m_mpfail)
9261541Srgrimes
9271541Srgrimesstruct mbuf *
9281541Srgrimesm_pullup(n, len)
9291541Srgrimes	register struct mbuf *n;
9301541Srgrimes	int len;
9311541Srgrimes{
9321541Srgrimes	register struct mbuf *m;
9331541Srgrimes	register int count;
9341541Srgrimes	int space;
9351541Srgrimes
9361541Srgrimes	/*
9371541Srgrimes	 * If first mbuf has no cluster, and has room for len bytes
9381541Srgrimes	 * without shifting current data, pullup into it,
9391541Srgrimes	 * otherwise allocate a new mbuf to prepend to the chain.
9401541Srgrimes	 */
9411541Srgrimes	if ((n->m_flags & M_EXT) == 0 &&
9421541Srgrimes	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
9431541Srgrimes		if (n->m_len >= len)
9441541Srgrimes			return (n);
9451541Srgrimes		m = n;
9461541Srgrimes		n = n->m_next;
9471541Srgrimes		len -= m->m_len;
9481541Srgrimes	} else {
9491541Srgrimes		if (len > MHLEN)
9501541Srgrimes			goto bad;
9511541Srgrimes		MGET(m, M_DONTWAIT, n->m_type);
9521541Srgrimes		if (m == 0)
9531541Srgrimes			goto bad;
9541541Srgrimes		m->m_len = 0;
9551541Srgrimes		if (n->m_flags & M_PKTHDR) {
9561541Srgrimes			M_COPY_PKTHDR(m, n);
9571541Srgrimes			n->m_flags &= ~M_PKTHDR;
9581541Srgrimes		}
9591541Srgrimes	}
9601541Srgrimes	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
9611541Srgrimes	do {
9621541Srgrimes		count = min(min(max(len, max_protohdr), space), n->m_len);
9631541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
9641541Srgrimes		  (unsigned)count);
9651541Srgrimes		len -= count;
9661541Srgrimes		m->m_len += count;
9671541Srgrimes		n->m_len -= count;
9681541Srgrimes		space -= count;
9691541Srgrimes		if (n->m_len)
9701541Srgrimes			n->m_data += count;
9711541Srgrimes		else
9721541Srgrimes			n = m_free(n);
9731541Srgrimes	} while (len > 0 && n);
9741541Srgrimes	if (len > 0) {
9751541Srgrimes		(void) m_free(m);
9761541Srgrimes		goto bad;
9771541Srgrimes	}
9781541Srgrimes	m->m_next = n;
9791541Srgrimes	return (m);
9801541Srgrimesbad:
9811541Srgrimes	m_freem(n);
98266475Sbmilekic	atomic_add_long(&MPFail, 1);
9831541Srgrimes	return (0);
9841541Srgrimes}
9851541Srgrimes
9861541Srgrimes/*
9871541Srgrimes * Partition an mbuf chain in two pieces, returning the tail --
9881541Srgrimes * all but the first len0 bytes.  In case of failure, it returns NULL and
9891541Srgrimes * attempts to restore the chain to its original state.
9901541Srgrimes */
9911541Srgrimesstruct mbuf *
9921541Srgrimesm_split(m0, len0, wait)
9931541Srgrimes	register struct mbuf *m0;
9941541Srgrimes	int len0, wait;
9951541Srgrimes{
9961541Srgrimes	register struct mbuf *m, *n;
9971541Srgrimes	unsigned len = len0, remain;
9981541Srgrimes
9991541Srgrimes	for (m = m0; m && len > m->m_len; m = m->m_next)
10001541Srgrimes		len -= m->m_len;
10011541Srgrimes	if (m == 0)
10021541Srgrimes		return (0);
10031541Srgrimes	remain = m->m_len - len;
10041541Srgrimes	if (m0->m_flags & M_PKTHDR) {
10051541Srgrimes		MGETHDR(n, wait, m0->m_type);
10061541Srgrimes		if (n == 0)
10071541Srgrimes			return (0);
10081541Srgrimes		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10091541Srgrimes		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
10101541Srgrimes		m0->m_pkthdr.len = len0;
10111541Srgrimes		if (m->m_flags & M_EXT)
10121541Srgrimes			goto extpacket;
10131541Srgrimes		if (remain > MHLEN) {
10141541Srgrimes			/* m can't be the lead packet */
10151541Srgrimes			MH_ALIGN(n, 0);
10161541Srgrimes			n->m_next = m_split(m, len, wait);
10171541Srgrimes			if (n->m_next == 0) {
10181541Srgrimes				(void) m_free(n);
10191541Srgrimes				return (0);
10201541Srgrimes			} else
10211541Srgrimes				return (n);
10221541Srgrimes		} else
10231541Srgrimes			MH_ALIGN(n, remain);
10241541Srgrimes	} else if (remain == 0) {
10251541Srgrimes		n = m->m_next;
10261541Srgrimes		m->m_next = 0;
10271541Srgrimes		return (n);
10281541Srgrimes	} else {
10291541Srgrimes		MGET(n, wait, m->m_type);
10301541Srgrimes		if (n == 0)
10311541Srgrimes			return (0);
10321541Srgrimes		M_ALIGN(n, remain);
10331541Srgrimes	}
10341541Srgrimesextpacket:
10351541Srgrimes	if (m->m_flags & M_EXT) {
10361541Srgrimes		n->m_flags |= M_EXT;
10371541Srgrimes		n->m_ext = m->m_ext;
103864837Sdwmalone		MEXT_ADD_REF(m);
10391541Srgrimes		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
10401541Srgrimes		n->m_data = m->m_data + len;
10411541Srgrimes	} else {
10421541Srgrimes		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
10431541Srgrimes	}
10441541Srgrimes	n->m_len = remain;
10451541Srgrimes	m->m_len = len;
10461541Srgrimes	n->m_next = m->m_next;
10471541Srgrimes	m->m_next = 0;
10481541Srgrimes	return (n);
10491541Srgrimes}
10501541Srgrimes/*
10511541Srgrimes * Routine to copy from device local memory into mbufs.
10521541Srgrimes */
10531541Srgrimesstruct mbuf *
10541541Srgrimesm_devget(buf, totlen, off0, ifp, copy)
10551541Srgrimes	char *buf;
10561541Srgrimes	int totlen, off0;
10571541Srgrimes	struct ifnet *ifp;
105812577Sbde	void (*copy) __P((char *from, caddr_t to, u_int len));
10591541Srgrimes{
10601541Srgrimes	register struct mbuf *m;
10611541Srgrimes	struct mbuf *top = 0, **mp = &top;
10621541Srgrimes	register int off = off0, len;
10631541Srgrimes	register char *cp;
10641541Srgrimes	char *epkt;
10651541Srgrimes
10661541Srgrimes	cp = buf;
10671541Srgrimes	epkt = cp + totlen;
10681541Srgrimes	if (off) {
10691541Srgrimes		cp += off + 2 * sizeof(u_short);
10701541Srgrimes		totlen -= 2 * sizeof(u_short);
10711541Srgrimes	}
10721541Srgrimes	MGETHDR(m, M_DONTWAIT, MT_DATA);
10731541Srgrimes	if (m == 0)
10741541Srgrimes		return (0);
10751541Srgrimes	m->m_pkthdr.rcvif = ifp;
10761541Srgrimes	m->m_pkthdr.len = totlen;
10771541Srgrimes	m->m_len = MHLEN;
10781541Srgrimes
10791541Srgrimes	while (totlen > 0) {
10801541Srgrimes		if (top) {
10811541Srgrimes			MGET(m, M_DONTWAIT, MT_DATA);
10821541Srgrimes			if (m == 0) {
10831541Srgrimes				m_freem(top);
10841541Srgrimes				return (0);
10851541Srgrimes			}
10861541Srgrimes			m->m_len = MLEN;
10871541Srgrimes		}
10881541Srgrimes		len = min(totlen, epkt - cp);
10891541Srgrimes		if (len >= MINCLSIZE) {
10901541Srgrimes			MCLGET(m, M_DONTWAIT);
10911541Srgrimes			if (m->m_flags & M_EXT)
10921541Srgrimes				m->m_len = len = min(len, MCLBYTES);
10931541Srgrimes			else
10941541Srgrimes				len = m->m_len;
10951541Srgrimes		} else {
10961541Srgrimes			/*
10971541Srgrimes			 * Place initial small packet/header at end of mbuf.
10981541Srgrimes			 */
10991541Srgrimes			if (len < m->m_len) {
11001541Srgrimes				if (top == 0 && len + max_linkhdr <= m->m_len)
11011541Srgrimes					m->m_data += max_linkhdr;
11021541Srgrimes				m->m_len = len;
11031541Srgrimes			} else
11041541Srgrimes				len = m->m_len;
11051541Srgrimes		}
11061541Srgrimes		if (copy)
11071541Srgrimes			copy(cp, mtod(m, caddr_t), (unsigned)len);
11081541Srgrimes		else
11091541Srgrimes			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
11101541Srgrimes		cp += len;
11111541Srgrimes		*mp = m;
11121541Srgrimes		mp = &m->m_next;
11131541Srgrimes		totlen -= len;
11141541Srgrimes		if (cp == epkt)
11151541Srgrimes			cp = buf;
11161541Srgrimes	}
11171541Srgrimes	return (top);
11181541Srgrimes}
11193352Sphk
11203352Sphk/*
11213352Sphk * Copy data from a buffer back into the indicated mbuf chain,
11223352Sphk * starting "off" bytes from the beginning, extending the mbuf
11233352Sphk * chain if necessary.
11243352Sphk */
11253352Sphkvoid
11263352Sphkm_copyback(m0, off, len, cp)
11273352Sphk	struct	mbuf *m0;
11283352Sphk	register int off;
11293352Sphk	register int len;
11303352Sphk	caddr_t cp;
11313352Sphk{
11323352Sphk	register int mlen;
11333352Sphk	register struct mbuf *m = m0, *n;
11343352Sphk	int totlen = 0;
11353352Sphk
11363352Sphk	if (m0 == 0)
11373352Sphk		return;
11383352Sphk	while (off > (mlen = m->m_len)) {
11393352Sphk		off -= mlen;
11403352Sphk		totlen += mlen;
11413352Sphk		if (m->m_next == 0) {
11423352Sphk			n = m_getclr(M_DONTWAIT, m->m_type);
11433352Sphk			if (n == 0)
11443352Sphk				goto out;
11453352Sphk			n->m_len = min(MLEN, len + off);
11463352Sphk			m->m_next = n;
11473352Sphk		}
11483352Sphk		m = m->m_next;
11493352Sphk	}
11503352Sphk	while (len > 0) {
11513352Sphk		mlen = min (m->m_len - off, len);
11523352Sphk		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
11533352Sphk		cp += mlen;
11543352Sphk		len -= mlen;
11553352Sphk		mlen += off;
11563352Sphk		off = 0;
11573352Sphk		totlen += mlen;
11583352Sphk		if (len == 0)
11593352Sphk			break;
11603352Sphk		if (m->m_next == 0) {
11613352Sphk			n = m_get(M_DONTWAIT, m->m_type);
11623352Sphk			if (n == 0)
11633352Sphk				break;
11643352Sphk			n->m_len = min(MLEN, len);
11653352Sphk			m->m_next = n;
11663352Sphk		}
11673352Sphk		m = m->m_next;
11683352Sphk	}
11693352Sphkout:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
11703352Sphk		m->m_pkthdr.len = totlen;
11713352Sphk}
117252756Sphk
117352756Sphkvoid
117452756Sphkm_print(const struct mbuf *m)
117552756Sphk{
117652756Sphk	int len;
117754906Seivind	const struct mbuf *m2;
117852756Sphk
117952756Sphk	len = m->m_pkthdr.len;
118052756Sphk	m2 = m;
118152756Sphk	while (len) {
118252756Sphk		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
118352756Sphk		len -= m2->m_len;
118452756Sphk		m2 = m2->m_next;
118552756Sphk	}
118652756Sphk	return;
118752756Sphk}
1188