uipc_mbuf.c revision 76166
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. All advertising materials mentioning features or use of this software
141541Srgrimes *    must display the following acknowledgement:
151541Srgrimes *	This product includes software developed by the University of
161541Srgrimes *	California, Berkeley and its contributors.
171541Srgrimes * 4. Neither the name of the University nor the names of its contributors
181541Srgrimes *    may be used to endorse or promote products derived from this software
191541Srgrimes *    without specific prior written permission.
201541Srgrimes *
211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311541Srgrimes * SUCH DAMAGE.
321541Srgrimes *
331541Srgrimes *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 76166 2001-05-01 08:13:21Z markm $
351541Srgrimes */
361541Srgrimes
3748579Smsmith#include "opt_param.h"
381541Srgrimes#include <sys/param.h>
391541Srgrimes#include <sys/systm.h>
4076166Smarkm#include <sys/condvar.h>
4176166Smarkm#include <sys/kernel.h>
4276166Smarkm#include <sys/lock.h>
4332036Sbde#include <sys/malloc.h>
441541Srgrimes#include <sys/mbuf.h>
4567365Sjhb#include <sys/mutex.h>
4623081Swollman#include <sys/sysctl.h>
471541Srgrimes#include <sys/domain.h>
481541Srgrimes#include <sys/protosw.h>
4976166Smarkm
501541Srgrimes#include <vm/vm.h>
519759Sbde#include <vm/vm_kern.h>
5212662Sdg#include <vm/vm_extern.h>
531541Srgrimes
5472356Sbmilekicstatic void mbinit(void *);
5510358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
5610358Sjulian
579759Sbdestruct mbuf *mbutl;
589759Sbdestruct mbstat mbstat;
5963203Salfredu_long	mbtypes[MT_NTYPES];
609759Sbdeint	max_linkhdr;
619759Sbdeint	max_protohdr;
629759Sbdeint	max_hdr;
639759Sbdeint	max_datalen;
6448579Smsmithint	nmbclusters;
6548579Smsmithint	nmbufs;
6667144Sbmilekicint	nmbcnt;
6766475Sbmilekicu_long	m_mballoc_wid = 0;
6866475Sbmilekicu_long	m_clalloc_wid = 0;
691541Srgrimes
7066475Sbmilekic/*
7166475Sbmilekic * freelist header structures...
7266475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst
7366475Sbmilekic */
7466475Sbmilekicstruct mbffree_lst mmbfree;
7566475Sbmilekicstruct mclfree_lst mclfree;
7666475Sbmilekicstruct mcntfree_lst mcntfree;
7775105Salfredstruct mtx	mbuf_mtx;
7866475Sbmilekic
7966475Sbmilekic/*
8066475Sbmilekic * sysctl(8) exported objects
8166475Sbmilekic */
8244078SdfrSYSCTL_DECL(_kern_ipc);
8323081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
8423081Swollman	   &max_linkhdr, 0, "");
8523081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
8623081Swollman	   &max_protohdr, 0, "");
8723081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
8823081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
8923081Swollman	   &max_datalen, 0, "");
9054478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
9154478Sgreen	   &mbuf_wait, 0, "");
9264048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
9363203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
9463203Salfred	   sizeof(mbtypes), "LU", "");
9548579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
9655171Smsmith	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
9755171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
9855171Smsmith	   "Maximum number of mbufs available");
9967144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
10067144Sbmilekic	   "Maximum number of ext_buf counters available");
10175112Sbmilekic
10248579Smsmith#ifndef NMBCLUSTERS
10348579Smsmith#define NMBCLUSTERS	(512 + MAXUSERS * 16)
10448579Smsmith#endif
10575112Sbmilekic
10648579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
10755171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
10867144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt);
10923081Swollman
11072356Sbmilekicstatic void	m_reclaim(void);
11112819Sphk
11266475Sbmilekic/* Initial allocation numbers */
11364837Sdwmalone#define NCL_INIT	2
11415744Sphk#define NMB_INIT	16
11566475Sbmilekic#define REF_INIT	NMBCLUSTERS
11615744Sphk
11766475Sbmilekic/*
11866475Sbmilekic * Full mbuf subsystem initialization done here.
11966475Sbmilekic *
12066475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to
12166475Sbmilekic *      machdep.c - for now, there is no reason for this stuff to go there.
12266475Sbmilekic */
12310358Sjulianstatic void
12472356Sbmilekicmbinit(void *dummy)
1251541Srgrimes{
12675686Sbmilekic	vm_offset_t maxaddr;
12775686Sbmilekic	vm_size_t mb_map_size;
1281541Srgrimes
12966475Sbmilekic	/*
13066475Sbmilekic	 * Setup the mb_map, allocate requested VM space.
13166475Sbmilekic	 */
13275686Sbmilekic	mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES +
13375686Sbmilekic	    nmbcnt * sizeof(union mext_refcnt));
13475686Sbmilekic	mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
13566475Sbmilekic	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
13666475Sbmilekic	    mb_map_size);
13772356Sbmilekic	/* XXX XXX XXX: mb_map->system_map = 1; */
13864837Sdwmalone
13966475Sbmilekic	/*
14066475Sbmilekic	 * Initialize the free list headers, and setup locks for lists.
14166475Sbmilekic	 */
14266475Sbmilekic	mmbfree.m_head = NULL;
14366475Sbmilekic	mclfree.m_head = NULL;
14466475Sbmilekic	mcntfree.m_head = NULL;
14575105Salfred	mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF);
14675112Sbmilekic	cv_init(&mmbfree.m_starved, "mbuf free list starved cv");
14775112Sbmilekic	cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv");
14866475Sbmilekic
14966475Sbmilekic	/*
15066475Sbmilekic	 * Initialize mbuf subsystem (sysctl exported) statistics structure.
15166475Sbmilekic	 */
15223081Swollman	mbstat.m_msize = MSIZE;
15323081Swollman	mbstat.m_mclbytes = MCLBYTES;
15423081Swollman	mbstat.m_minclsize = MINCLSIZE;
15523081Swollman	mbstat.m_mlen = MLEN;
15623081Swollman	mbstat.m_mhlen = MHLEN;
15723081Swollman
15866475Sbmilekic	/*
15966475Sbmilekic	 * Perform some initial allocations.
16066475Sbmilekic	 */
16175105Salfred	mtx_lock(&mbuf_mtx);
16266475Sbmilekic	if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
16364837Sdwmalone		goto bad;
16415689Swollman	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
16515689Swollman		goto bad;
1661541Srgrimes	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
1671541Srgrimes		goto bad;
16875105Salfred	mtx_unlock(&mbuf_mtx);
16966475Sbmilekic
1701541Srgrimes	return;
1711541Srgrimesbad:
17264837Sdwmalone	panic("mbinit: failed to initialize mbuf subsystem!");
1731541Srgrimes}
1741541Srgrimes
1751541Srgrimes/*
17664837Sdwmalone * Allocate at least nmb reference count structs and place them
17764837Sdwmalone * on the ref cnt free list.
17866475Sbmilekic *
17966475Sbmilekic * Must be called with the mcntfree lock held.
18064837Sdwmalone */
18164837Sdwmaloneint
18272356Sbmilekicm_alloc_ref(u_int nmb, int how)
18364837Sdwmalone{
18464837Sdwmalone	caddr_t p;
18564837Sdwmalone	u_int nbytes;
18664837Sdwmalone	int i;
18764837Sdwmalone
18864837Sdwmalone	/*
18964837Sdwmalone	 * We don't cap the amount of memory that can be used
19064837Sdwmalone	 * by the reference counters, like we do for mbufs and
19166475Sbmilekic	 * mbuf clusters. In fact, we're absolutely sure that we
19266475Sbmilekic	 * won't ever be going over our allocated space. We keep enough
19366475Sbmilekic	 * space in mb_map to accomodate maximum values of allocatable
19466475Sbmilekic	 * external buffers including, but not limited to, clusters.
19566475Sbmilekic	 * (That's also why we won't have to have wait routines for
19666475Sbmilekic	 * counters).
19766475Sbmilekic	 *
19866475Sbmilekic	 * If we're in here, we're absolutely certain to be returning
19966475Sbmilekic	 * succesfully, as long as there is physical memory to accomodate
20066475Sbmilekic	 * us. And if there isn't, but we're willing to wait, then
20166475Sbmilekic	 * kmem_malloc() will do the only waiting needed.
20264837Sdwmalone	 */
20364837Sdwmalone
20464837Sdwmalone	nbytes = round_page(nmb * sizeof(union mext_refcnt));
20575105Salfred	if (1 /* XXX: how == M_TRYWAIT */)
20675105Salfred		mtx_unlock(&mbuf_mtx);
20770254Sbmilekic	if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
20870254Sbmilekic	    M_WAITOK : M_NOWAIT)) == NULL) {
20975105Salfred		if (1 /* XXX: how == M_TRYWAIT */)
21075105Salfred			mtx_lock(&mbuf_mtx);
21164837Sdwmalone		return (0);
21266475Sbmilekic	}
21364837Sdwmalone	nmb = nbytes / sizeof(union mext_refcnt);
21464837Sdwmalone
21566475Sbmilekic	/*
21666475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
21766475Sbmilekic	 * It is up to the caller to let go of the mutex.
21866475Sbmilekic	 */
21975105Salfred	if (1 /* XXX: how == M_TRYWAIT */)
22075105Salfred		mtx_lock(&mbuf_mtx);
22164837Sdwmalone	for (i = 0; i < nmb; i++) {
22266475Sbmilekic		((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
22366475Sbmilekic		mcntfree.m_head = (union mext_refcnt *)p;
22464837Sdwmalone		p += sizeof(union mext_refcnt);
22564837Sdwmalone		mbstat.m_refree++;
22664837Sdwmalone	}
22764837Sdwmalone	mbstat.m_refcnt += nmb;
22864837Sdwmalone
22964837Sdwmalone	return (1);
23064837Sdwmalone}
23164837Sdwmalone
23264837Sdwmalone/*
23315689Swollman * Allocate at least nmb mbufs and place on mbuf free list.
23466475Sbmilekic *
23566475Sbmilekic * Must be called with the mmbfree lock held.
23615689Swollman */
23715689Swollmanint
23872356Sbmilekicm_mballoc(int nmb, int how)
23915689Swollman{
24072356Sbmilekic	caddr_t p;
24172356Sbmilekic	int i;
24215689Swollman	int nbytes;
24315689Swollman
24474402Sbmilekic	nbytes = round_page(nmb * MSIZE);
24574402Sbmilekic	nmb = nbytes / MSIZE;
24674402Sbmilekic
24754478Sgreen	/*
24866475Sbmilekic	 * If we've hit the mbuf limit, stop allocating from mb_map.
24966475Sbmilekic	 * Also, once we run out of map space, it will be impossible to
25066475Sbmilekic	 * get any more (nothing is ever freed back to the map).
25155171Smsmith	 */
25274764Sbmilekic	if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs))
25355171Smsmith		return (0);
25455171Smsmith
25575105Salfred	if (1 /* XXX: how == M_TRYWAIT */)
25675105Salfred		mtx_unlock(&mbuf_mtx);
25775105Salfred	p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
25875105Salfred		M_WAITOK : M_NOWAIT);
25975105Salfred	if (1 /* XXX: how == M_TRYWAIT */) {
26075105Salfred		mtx_lock(&mbuf_mtx);
26175105Salfred		if (p == NULL)
26275105Salfred			mbstat.m_wait++;
26322899Swollman	}
26422899Swollman
26515689Swollman	/*
26666475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
26715689Swollman	 * are no pages left.
26815689Swollman	 */
26915689Swollman	if (p == NULL)
27015689Swollman		return (0);
27115689Swollman
27266475Sbmilekic	/*
27366475Sbmilekic	 * We don't let go of the mutex in order to avoid a race.
27466475Sbmilekic	 * It is up to the caller to let go of the mutex when done
27566475Sbmilekic	 * with grabbing the mbuf from the free list.
27666475Sbmilekic	 */
27715689Swollman	for (i = 0; i < nmb; i++) {
27866475Sbmilekic		((struct mbuf *)p)->m_next = mmbfree.m_head;
27966475Sbmilekic		mmbfree.m_head = (struct mbuf *)p;
28015689Swollman		p += MSIZE;
28115689Swollman	}
28215689Swollman	mbstat.m_mbufs += nmb;
28363203Salfred	mbtypes[MT_FREE] += nmb;
28415689Swollman	return (1);
28515689Swollman}
28615689Swollman
28754478Sgreen/*
28854478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros
28970254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
29070254Sbmilekic * rely solely on reclaimed mbufs.
29166475Sbmilekic *
29266475Sbmilekic * Here we request for the protocols to free up some resources and, if we
29366475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a
29475112Sbmilekic * designated (mbuf_wait) time, at most.
29566475Sbmilekic *
29670858Sbmilekic * Must be called with the mmbfree mutex held.
29754478Sgreen */
29854478Sgreenstruct mbuf *
29966475Sbmilekicm_mballoc_wait(void)
30054478Sgreen{
30166475Sbmilekic	struct mbuf *p = NULL;
30254478Sgreen
30354478Sgreen	/*
30466475Sbmilekic	 * See if we can drain some resources out of the protocols.
30570858Sbmilekic	 * We drop the mmbfree mutex to avoid recursing into it in some of
30670858Sbmilekic	 * the drain routines. Clearly, we're faced with a race here because
30770858Sbmilekic	 * once something is freed during the drain, it may be grabbed right
30870858Sbmilekic	 * from under us by some other thread. But we accept this possibility
30970858Sbmilekic	 * in order to avoid a potentially large lock recursion and, more
31070858Sbmilekic	 * importantly, to avoid a potential lock order reversal which may
31170858Sbmilekic	 * result in deadlock (See comment above m_reclaim()).
31254478Sgreen	 */
31375105Salfred	mtx_unlock(&mbuf_mtx);
31466475Sbmilekic	m_reclaim();
31570858Sbmilekic
31675105Salfred	mtx_lock(&mbuf_mtx);
31766475Sbmilekic	_MGET(p, M_DONTWAIT);
31854478Sgreen
31966475Sbmilekic	if (p == NULL) {
32075112Sbmilekic		int retval;
32175112Sbmilekic
32266475Sbmilekic		m_mballoc_wid++;
32375112Sbmilekic		retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx,
32471302Sbmilekic		    mbuf_wait);
32571302Sbmilekic		m_mballoc_wid--;
32666475Sbmilekic
32766475Sbmilekic		/*
32875112Sbmilekic		 * If we got signaled (i.e. didn't time out), allocate.
32966475Sbmilekic		 */
33075112Sbmilekic		if (retval == 0)
33175112Sbmilekic			_MGET(p, M_DONTWAIT);
33254478Sgreen	}
33354478Sgreen
33466475Sbmilekic	if (p != NULL) {
33575105Salfred		mbstat.m_wait++;
33666475Sbmilekic		if (mmbfree.m_head != NULL)
33775112Sbmilekic			MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved);
33874764Sbmilekic	}
33922671Swollman
34066475Sbmilekic	return (p);
34122671Swollman}
34222671Swollman
34315689Swollman/*
3441541Srgrimes * Allocate some number of mbuf clusters
3451541Srgrimes * and place on cluster free list.
34666475Sbmilekic *
34766475Sbmilekic * Must be called with the mclfree lock held.
3481541Srgrimes */
3491549Srgrimesint
35072356Sbmilekicm_clalloc(int ncl, int how)
3511541Srgrimes{
35272356Sbmilekic	caddr_t p;
35372356Sbmilekic	int i;
35474402Sbmilekic	int npg_sz;
3551541Srgrimes
35674402Sbmilekic	npg_sz = round_page(ncl * MCLBYTES);
35774402Sbmilekic	ncl = npg_sz / MCLBYTES;
35874402Sbmilekic
3597066Sdg	/*
36066475Sbmilekic	 * If the map is now full (nothing will ever be freed to it).
36155171Smsmith	 * If we've hit the mcluster number limit, stop allocating from
36266475Sbmilekic	 * mb_map.
36355171Smsmith	 */
36474764Sbmilekic	if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters))
36555171Smsmith		return (0);
36655171Smsmith
36775105Salfred	if (1 /* XXX: how == M_TRYWAIT */)
36875105Salfred		mtx_unlock(&mbuf_mtx);
36974402Sbmilekic	p = (caddr_t)kmem_malloc(mb_map, npg_sz,
37070254Sbmilekic				 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
37175105Salfred	if (1 /* XXX: how == M_TRYWAIT */)
37275105Salfred		mtx_lock(&mbuf_mtx);
37366475Sbmilekic
3747066Sdg	/*
37566475Sbmilekic	 * Either the map is now full, or `how' is M_DONTWAIT and there
3767066Sdg	 * are no pages left.
3777066Sdg	 */
37874764Sbmilekic	if (p == NULL)
3791541Srgrimes		return (0);
3807066Sdg
3811541Srgrimes	for (i = 0; i < ncl; i++) {
38266475Sbmilekic		((union mcluster *)p)->mcl_next = mclfree.m_head;
38366475Sbmilekic		mclfree.m_head = (union mcluster *)p;
3841541Srgrimes		p += MCLBYTES;
3851541Srgrimes		mbstat.m_clfree++;
3861541Srgrimes	}
3871541Srgrimes	mbstat.m_clusters += ncl;
3881541Srgrimes	return (1);
3891541Srgrimes}
3901541Srgrimes
3911541Srgrimes/*
39254478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with
39370254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
39475112Sbmilekic * block on a cv for a designated amount of time (mbuf_wait) or until we're
39575112Sbmilekic * signaled due to sudden mcluster availability.
39666475Sbmilekic *
39766475Sbmilekic * Must be called with the mclfree lock held.
39854478Sgreen */
39954478Sgreencaddr_t
40054478Sgreenm_clalloc_wait(void)
40154478Sgreen{
40266475Sbmilekic	caddr_t p = NULL;
40375112Sbmilekic	int retval;
40454478Sgreen
40554478Sgreen	m_clalloc_wid++;
40675112Sbmilekic	retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait);
40771302Sbmilekic	m_clalloc_wid--;
40854478Sgreen
40954478Sgreen	/*
41066475Sbmilekic	 * Now that we (think) that we've got something, try again.
41154478Sgreen	 */
41275112Sbmilekic	if (retval == 0)
41375112Sbmilekic		_MCLALLOC(p, M_DONTWAIT);
41454478Sgreen
41566475Sbmilekic	if (p != NULL) {
41675105Salfred		mbstat.m_wait++;
41766475Sbmilekic		if (mclfree.m_head != NULL)
41875112Sbmilekic			MBWAKEUP(m_clalloc_wid, &mclfree.m_starved);
41974764Sbmilekic	}
42054478Sgreen
42154478Sgreen	return (p);
42254478Sgreen}
42354478Sgreen
42454478Sgreen/*
42566475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources...
42666475Sbmilekic *
42770858Sbmilekic * XXX: No locks should be held going in here. The drain routines have
42870858Sbmilekic * to presently acquire some locks which raises the possibility of lock
42970858Sbmilekic * order violation if we're holding any mutex if that mutex is acquired in
43070858Sbmilekic * reverse order relative to one of the locks in the drain routines.
4311541Srgrimes */
43212819Sphkstatic void
43372356Sbmilekicm_reclaim(void)
4341541Srgrimes{
43572356Sbmilekic	struct domain *dp;
43672356Sbmilekic	struct protosw *pr;
4371541Srgrimes
43871089Sbmilekic#ifdef WITNESS
43975112Sbmilekic	KASSERT(witness_list(curproc) == 0,
44071089Sbmilekic	    ("m_reclaim called with locks held"));
44171089Sbmilekic#endif
44271089Sbmilekic
4431541Srgrimes	for (dp = domains; dp; dp = dp->dom_next)
4441541Srgrimes		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
4451541Srgrimes			if (pr->pr_drain)
4461541Srgrimes				(*pr->pr_drain)();
4471541Srgrimes	mbstat.m_drain++;
4481541Srgrimes}
4491541Srgrimes
4501541Srgrimes/*
4511541Srgrimes * Space allocation routines.
45272473Sbmilekic * Some of these are also available as macros
4531541Srgrimes * for critical paths.
4541541Srgrimes */
4551541Srgrimesstruct mbuf *
45672356Sbmilekicm_get(int how, int type)
4571541Srgrimes{
45872356Sbmilekic	struct mbuf *m;
4591541Srgrimes
46032036Sbde	MGET(m, how, type);
4611541Srgrimes	return (m);
4621541Srgrimes}
4631541Srgrimes
4641541Srgrimesstruct mbuf *
46572356Sbmilekicm_gethdr(int how, int type)
4661541Srgrimes{
46772356Sbmilekic	struct mbuf *m;
4681541Srgrimes
46932036Sbde	MGETHDR(m, how, type);
4701541Srgrimes	return (m);
4711541Srgrimes}
4721541Srgrimes
4731541Srgrimesstruct mbuf *
47472356Sbmilekicm_getclr(int how, int type)
4751541Srgrimes{
47672356Sbmilekic	struct mbuf *m;
4771541Srgrimes
47832036Sbde	MGET(m, how, type);
47975105Salfred	if (m != NULL)
48075105Salfred		bzero(mtod(m, caddr_t), MLEN);
4811541Srgrimes	return (m);
4821541Srgrimes}
4831541Srgrimes
4841541Srgrimesstruct mbuf *
48572356Sbmilekicm_free(struct mbuf *m)
4861541Srgrimes{
48772356Sbmilekic	struct mbuf *n;
4881541Srgrimes
4891541Srgrimes	MFREE(m, n);
4901541Srgrimes	return (n);
4911541Srgrimes}
4921541Srgrimes
49372473Sbmilekic/*
49472473Sbmilekic * struct mbuf *
49572473Sbmilekic * m_getm(m, len, how, type)
49672473Sbmilekic *
49772473Sbmilekic * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits
49872473Sbmilekic * best) and return a pointer to the top of the allocated chain. If m is
49972473Sbmilekic * non-null, then we assume that it is a single mbuf or an mbuf chain to
50072473Sbmilekic * which we want len bytes worth of mbufs and/or clusters attached, and so
50172473Sbmilekic * if we succeed in allocating it, we will just return a pointer to m.
50272473Sbmilekic *
50372473Sbmilekic * If we happen to fail at any point during the allocation, we will free
50472473Sbmilekic * up everything we have already allocated and return NULL.
50572473Sbmilekic *
50672473Sbmilekic */
50772473Sbmilekicstruct mbuf *
50872473Sbmilekicm_getm(struct mbuf *m, int len, int how, int type)
50972473Sbmilekic{
51072473Sbmilekic	struct mbuf *top, *tail, *mp, *mtail = NULL;
51172473Sbmilekic
51272473Sbmilekic	KASSERT(len >= 0, ("len is < 0 in m_getm"));
51372473Sbmilekic
51472789Sbp	MGET(mp, how, type);
51572473Sbmilekic	if (mp == NULL)
51672473Sbmilekic		return (NULL);
51772473Sbmilekic	else if (len > MINCLSIZE) {
51872473Sbmilekic		MCLGET(mp, how);
51972473Sbmilekic		if ((mp->m_flags & M_EXT) == 0) {
52072473Sbmilekic			m_free(mp);
52172473Sbmilekic			return (NULL);
52272473Sbmilekic		}
52372473Sbmilekic	}
52472473Sbmilekic	mp->m_len = 0;
52572473Sbmilekic	len -= M_TRAILINGSPACE(mp);
52672473Sbmilekic
52772473Sbmilekic	if (m != NULL)
52872473Sbmilekic		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
52972473Sbmilekic	else
53072473Sbmilekic		m = mp;
53172473Sbmilekic
53272473Sbmilekic	top = tail = mp;
53372473Sbmilekic	while (len > 0) {
53472789Sbp		MGET(mp, how, type);
53572473Sbmilekic		if (mp == NULL)
53672473Sbmilekic			goto failed;
53772473Sbmilekic
53872473Sbmilekic		tail->m_next = mp;
53972473Sbmilekic		tail = mp;
54072473Sbmilekic		if (len > MINCLSIZE) {
54172473Sbmilekic			MCLGET(mp, how);
54272473Sbmilekic			if ((mp->m_flags & M_EXT) == 0)
54372473Sbmilekic				goto failed;
54472473Sbmilekic		}
54572473Sbmilekic
54672473Sbmilekic		mp->m_len = 0;
54772473Sbmilekic		len -= M_TRAILINGSPACE(mp);
54872473Sbmilekic	}
54972473Sbmilekic
55072473Sbmilekic	if (mtail != NULL)
55172473Sbmilekic		mtail->m_next = top;
55272473Sbmilekic	return (m);
55372473Sbmilekic
55472473Sbmilekicfailed:
55572473Sbmilekic	m_freem(top);
55672473Sbmilekic	return (NULL);
55772473Sbmilekic}
55872473Sbmilekic
5591541Srgrimesvoid
56072356Sbmilekicm_freem(struct mbuf *m)
5611541Srgrimes{
56272356Sbmilekic	struct mbuf *n;
5631541Srgrimes
5641541Srgrimes	if (m == NULL)
5651541Srgrimes		return;
5661541Srgrimes	do {
56762587Sitojun		/*
56862587Sitojun		 * we do need to check non-first mbuf, since some of existing
56962587Sitojun		 * code does not call M_PREPEND properly.
57062587Sitojun		 * (example: call to bpf_mtap from drivers)
57162587Sitojun		 */
57262587Sitojun		if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) {
57362587Sitojun			m_freem(m->m_pkthdr.aux);
57462587Sitojun			m->m_pkthdr.aux = NULL;
57562587Sitojun		}
5761541Srgrimes		MFREE(m, n);
5773308Sphk		m = n;
5783308Sphk	} while (m);
5791541Srgrimes}
5801541Srgrimes
5811541Srgrimes/*
5821541Srgrimes * Lesser-used path for M_PREPEND:
5831541Srgrimes * allocate new mbuf to prepend to chain,
5841541Srgrimes * copy junk along.
5851541Srgrimes */
5861541Srgrimesstruct mbuf *
58772356Sbmilekicm_prepend(struct mbuf *m, int len, int how)
5881541Srgrimes{
5891541Srgrimes	struct mbuf *mn;
5901541Srgrimes
5911541Srgrimes	MGET(mn, how, m->m_type);
59272356Sbmilekic	if (mn == NULL) {
5931541Srgrimes		m_freem(m);
59472356Sbmilekic		return (NULL);
5951541Srgrimes	}
5961541Srgrimes	if (m->m_flags & M_PKTHDR) {
5971541Srgrimes		M_COPY_PKTHDR(mn, m);
5981541Srgrimes		m->m_flags &= ~M_PKTHDR;
5991541Srgrimes	}
6001541Srgrimes	mn->m_next = m;
6011541Srgrimes	m = mn;
6021541Srgrimes	if (len < MHLEN)
6031541Srgrimes		MH_ALIGN(m, len);
6041541Srgrimes	m->m_len = len;
6051541Srgrimes	return (m);
6061541Srgrimes}
6071541Srgrimes
6081541Srgrimes/*
6091541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
6101541Srgrimes * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
61170254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
61254002Sarchie * Note that the copy is read-only, because clusters are not copied,
61354002Sarchie * only their reference counts are incremented.
6141541Srgrimes */
6151541Srgrimesstruct mbuf *
61672356Sbmilekicm_copym(struct mbuf *m, int off0, int len, int wait)
6171541Srgrimes{
61872356Sbmilekic	struct mbuf *n, **np;
61972356Sbmilekic	int off = off0;
6201541Srgrimes	struct mbuf *top;
6211541Srgrimes	int copyhdr = 0;
6221541Srgrimes
62352201Salfred	KASSERT(off >= 0, ("m_copym, negative off %d", off));
62452201Salfred	KASSERT(len >= 0, ("m_copym, negative len %d", len));
6251541Srgrimes	if (off == 0 && m->m_flags & M_PKTHDR)
6261541Srgrimes		copyhdr = 1;
6271541Srgrimes	while (off > 0) {
62852201Salfred		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
6291541Srgrimes		if (off < m->m_len)
6301541Srgrimes			break;
6311541Srgrimes		off -= m->m_len;
6321541Srgrimes		m = m->m_next;
6331541Srgrimes	}
6341541Srgrimes	np = &top;
6351541Srgrimes	top = 0;
6361541Srgrimes	while (len > 0) {
63772356Sbmilekic		if (m == NULL) {
63852201Salfred			KASSERT(len == M_COPYALL,
63952201Salfred			    ("m_copym, length > size of mbuf chain"));
6401541Srgrimes			break;
6411541Srgrimes		}
6421541Srgrimes		MGET(n, wait, m->m_type);
6431541Srgrimes		*np = n;
64472356Sbmilekic		if (n == NULL)
6451541Srgrimes			goto nospace;
6461541Srgrimes		if (copyhdr) {
6471541Srgrimes			M_COPY_PKTHDR(n, m);
6481541Srgrimes			if (len == M_COPYALL)
6491541Srgrimes				n->m_pkthdr.len -= off0;
6501541Srgrimes			else
6511541Srgrimes				n->m_pkthdr.len = len;
6521541Srgrimes			copyhdr = 0;
6531541Srgrimes		}
6541541Srgrimes		n->m_len = min(len, m->m_len - off);
6551541Srgrimes		if (m->m_flags & M_EXT) {
6561541Srgrimes			n->m_data = m->m_data + off;
6571541Srgrimes			n->m_ext = m->m_ext;
6581541Srgrimes			n->m_flags |= M_EXT;
65964837Sdwmalone			MEXT_ADD_REF(m);
6601541Srgrimes		} else
6611541Srgrimes			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
6621541Srgrimes			    (unsigned)n->m_len);
6631541Srgrimes		if (len != M_COPYALL)
6641541Srgrimes			len -= n->m_len;
6651541Srgrimes		off = 0;
6661541Srgrimes		m = m->m_next;
6671541Srgrimes		np = &n->m_next;
6681541Srgrimes	}
66975105Salfred	if (top == NULL) {
67075105Salfred		mtx_lock(&mbuf_mtx);
67175105Salfred		mbstat.m_mcfail++;
67275105Salfred		mtx_unlock(&mbuf_mtx);
67375105Salfred	}
6741541Srgrimes	return (top);
6751541Srgrimesnospace:
6761541Srgrimes	m_freem(top);
67775105Salfred	mtx_lock(&mbuf_mtx);
67875105Salfred	mbstat.m_mcfail++;
67975105Salfred	mtx_unlock(&mbuf_mtx);
68072356Sbmilekic	return (NULL);
6811541Srgrimes}
6821541Srgrimes
6831541Srgrimes/*
68415689Swollman * Copy an entire packet, including header (which must be present).
68515689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
68654002Sarchie * Note that the copy is read-only, because clusters are not copied,
68754002Sarchie * only their reference counts are incremented.
68872750Sluigi * Preserve alignment of the first mbuf so if the creator has left
68972750Sluigi * some room at the beginning (e.g. for inserting protocol headers)
69072750Sluigi * the copies still have the room available.
69115689Swollman */
69215689Swollmanstruct mbuf *
69372356Sbmilekicm_copypacket(struct mbuf *m, int how)
69415689Swollman{
69515689Swollman	struct mbuf *top, *n, *o;
69615689Swollman
69715689Swollman	MGET(n, how, m->m_type);
69815689Swollman	top = n;
69972356Sbmilekic	if (n == NULL)
70015689Swollman		goto nospace;
70115689Swollman
70215689Swollman	M_COPY_PKTHDR(n, m);
70315689Swollman	n->m_len = m->m_len;
70415689Swollman	if (m->m_flags & M_EXT) {
70515689Swollman		n->m_data = m->m_data;
70615689Swollman		n->m_ext = m->m_ext;
70715689Swollman		n->m_flags |= M_EXT;
70864837Sdwmalone		MEXT_ADD_REF(m);
70915689Swollman	} else {
71072750Sluigi		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
71115689Swollman		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
71215689Swollman	}
71315689Swollman
71415689Swollman	m = m->m_next;
71515689Swollman	while (m) {
71615689Swollman		MGET(o, how, m->m_type);
71772356Sbmilekic		if (o == NULL)
71815689Swollman			goto nospace;
71915689Swollman
72015689Swollman		n->m_next = o;
72115689Swollman		n = n->m_next;
72215689Swollman
72315689Swollman		n->m_len = m->m_len;
72415689Swollman		if (m->m_flags & M_EXT) {
72515689Swollman			n->m_data = m->m_data;
72615689Swollman			n->m_ext = m->m_ext;
72715689Swollman			n->m_flags |= M_EXT;
72864837Sdwmalone			MEXT_ADD_REF(m);
72915689Swollman		} else {
73015689Swollman			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
73115689Swollman		}
73215689Swollman
73315689Swollman		m = m->m_next;
73415689Swollman	}
73515689Swollman	return top;
73615689Swollmannospace:
73715689Swollman	m_freem(top);
73875105Salfred	mtx_lock(&mbuf_mtx);
73975105Salfred	mbstat.m_mcfail++;
74075105Salfred	mtx_unlock(&mbuf_mtx);
74172356Sbmilekic	return (NULL);
74215689Swollman}
74315689Swollman
74415689Swollman/*
7451541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning,
7461541Srgrimes * continuing for "len" bytes, into the indicated buffer.
7471541Srgrimes */
7481549Srgrimesvoid
74972356Sbmilekicm_copydata(struct mbuf *m, int off, int len, caddr_t cp)
7501541Srgrimes{
75172356Sbmilekic	unsigned count;
7521541Srgrimes
75352201Salfred	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
75452201Salfred	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
7551541Srgrimes	while (off > 0) {
75652201Salfred		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
7571541Srgrimes		if (off < m->m_len)
7581541Srgrimes			break;
7591541Srgrimes		off -= m->m_len;
7601541Srgrimes		m = m->m_next;
7611541Srgrimes	}
7621541Srgrimes	while (len > 0) {
76352201Salfred		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
7641541Srgrimes		count = min(m->m_len - off, len);
7651541Srgrimes		bcopy(mtod(m, caddr_t) + off, cp, count);
7661541Srgrimes		len -= count;
7671541Srgrimes		cp += count;
7681541Srgrimes		off = 0;
7691541Srgrimes		m = m->m_next;
7701541Srgrimes	}
7711541Srgrimes}
7721541Srgrimes
7731541Srgrimes/*
77454002Sarchie * Copy a packet header mbuf chain into a completely new chain, including
77554002Sarchie * copying any mbuf clusters.  Use this instead of m_copypacket() when
77654002Sarchie * you need a writable copy of an mbuf chain.
77754002Sarchie */
77854002Sarchiestruct mbuf *
77972356Sbmilekicm_dup(struct mbuf *m, int how)
78054002Sarchie{
78154002Sarchie	struct mbuf **p, *top = NULL;
78254002Sarchie	int remain, moff, nsize;
78354002Sarchie
78454002Sarchie	/* Sanity check */
78554002Sarchie	if (m == NULL)
78672356Sbmilekic		return (NULL);
78754002Sarchie	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
78854002Sarchie
78954002Sarchie	/* While there's more data, get a new mbuf, tack it on, and fill it */
79054002Sarchie	remain = m->m_pkthdr.len;
79154002Sarchie	moff = 0;
79254002Sarchie	p = &top;
79354002Sarchie	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
79454002Sarchie		struct mbuf *n;
79554002Sarchie
79654002Sarchie		/* Get the next new mbuf */
79754002Sarchie		MGET(n, how, m->m_type);
79854002Sarchie		if (n == NULL)
79954002Sarchie			goto nospace;
80054002Sarchie		if (top == NULL) {		/* first one, must be PKTHDR */
80154002Sarchie			M_COPY_PKTHDR(n, m);
80254002Sarchie			nsize = MHLEN;
80354002Sarchie		} else				/* not the first one */
80454002Sarchie			nsize = MLEN;
80554002Sarchie		if (remain >= MINCLSIZE) {
80654002Sarchie			MCLGET(n, how);
80754002Sarchie			if ((n->m_flags & M_EXT) == 0) {
80854002Sarchie				(void)m_free(n);
80954002Sarchie				goto nospace;
81054002Sarchie			}
81154002Sarchie			nsize = MCLBYTES;
81254002Sarchie		}
81354002Sarchie		n->m_len = 0;
81454002Sarchie
81554002Sarchie		/* Link it into the new chain */
81654002Sarchie		*p = n;
81754002Sarchie		p = &n->m_next;
81854002Sarchie
81954002Sarchie		/* Copy data from original mbuf(s) into new mbuf */
82054002Sarchie		while (n->m_len < nsize && m != NULL) {
82154002Sarchie			int chunk = min(nsize - n->m_len, m->m_len - moff);
82254002Sarchie
82354002Sarchie			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
82454002Sarchie			moff += chunk;
82554002Sarchie			n->m_len += chunk;
82654002Sarchie			remain -= chunk;
82754002Sarchie			if (moff == m->m_len) {
82854002Sarchie				m = m->m_next;
82954002Sarchie				moff = 0;
83054002Sarchie			}
83154002Sarchie		}
83254002Sarchie
83354002Sarchie		/* Check correct total mbuf length */
83454002Sarchie		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
83554002Sarchie		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
83654002Sarchie	}
83754002Sarchie	return (top);
83854002Sarchie
83954002Sarchienospace:
84054002Sarchie	m_freem(top);
84175105Salfred	mtx_lock(&mbuf_mtx);
84275105Salfred	mbstat.m_mcfail++;
84375105Salfred	mtx_unlock(&mbuf_mtx);
84472356Sbmilekic	return (NULL);
84554002Sarchie}
84654002Sarchie
84754002Sarchie/*
8481541Srgrimes * Concatenate mbuf chain n to m.
8491541Srgrimes * Both chains must be of the same type (e.g. MT_DATA).
8501541Srgrimes * Any m_pkthdr is not updated.
8511541Srgrimes */
8521549Srgrimesvoid
85372356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n)
8541541Srgrimes{
8551541Srgrimes	while (m->m_next)
8561541Srgrimes		m = m->m_next;
8571541Srgrimes	while (n) {
8581541Srgrimes		if (m->m_flags & M_EXT ||
8591541Srgrimes		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
8601541Srgrimes			/* just join the two chains */
8611541Srgrimes			m->m_next = n;
8621541Srgrimes			return;
8631541Srgrimes		}
8641541Srgrimes		/* splat the data from one into the other */
8651541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
8661541Srgrimes		    (u_int)n->m_len);
8671541Srgrimes		m->m_len += n->m_len;
8681541Srgrimes		n = m_free(n);
8691541Srgrimes	}
8701541Srgrimes}
8711541Srgrimes
8721549Srgrimesvoid
87372356Sbmilekicm_adj(struct mbuf *mp, int req_len)
8741541Srgrimes{
87572356Sbmilekic	int len = req_len;
87672356Sbmilekic	struct mbuf *m;
87772356Sbmilekic	int count;
8781541Srgrimes
8791541Srgrimes	if ((m = mp) == NULL)
8801541Srgrimes		return;
8811541Srgrimes	if (len >= 0) {
8821541Srgrimes		/*
8831541Srgrimes		 * Trim from head.
8841541Srgrimes		 */
8851541Srgrimes		while (m != NULL && len > 0) {
8861541Srgrimes			if (m->m_len <= len) {
8871541Srgrimes				len -= m->m_len;
8881541Srgrimes				m->m_len = 0;
8891541Srgrimes				m = m->m_next;
8901541Srgrimes			} else {
8911541Srgrimes				m->m_len -= len;
8921541Srgrimes				m->m_data += len;
8931541Srgrimes				len = 0;
8941541Srgrimes			}
8951541Srgrimes		}
8961541Srgrimes		m = mp;
8971541Srgrimes		if (mp->m_flags & M_PKTHDR)
8981541Srgrimes			m->m_pkthdr.len -= (req_len - len);
8991541Srgrimes	} else {
9001541Srgrimes		/*
9011541Srgrimes		 * Trim from tail.  Scan the mbuf chain,
9021541Srgrimes		 * calculating its length and finding the last mbuf.
9031541Srgrimes		 * If the adjustment only affects this mbuf, then just
9041541Srgrimes		 * adjust and return.  Otherwise, rescan and truncate
9051541Srgrimes		 * after the remaining size.
9061541Srgrimes		 */
9071541Srgrimes		len = -len;
9081541Srgrimes		count = 0;
9091541Srgrimes		for (;;) {
9101541Srgrimes			count += m->m_len;
9111541Srgrimes			if (m->m_next == (struct mbuf *)0)
9121541Srgrimes				break;
9131541Srgrimes			m = m->m_next;
9141541Srgrimes		}
9151541Srgrimes		if (m->m_len >= len) {
9161541Srgrimes			m->m_len -= len;
9171541Srgrimes			if (mp->m_flags & M_PKTHDR)
9181541Srgrimes				mp->m_pkthdr.len -= len;
9191541Srgrimes			return;
9201541Srgrimes		}
9211541Srgrimes		count -= len;
9221541Srgrimes		if (count < 0)
9231541Srgrimes			count = 0;
9241541Srgrimes		/*
9251541Srgrimes		 * Correct length for chain is "count".
9261541Srgrimes		 * Find the mbuf with last data, adjust its length,
9271541Srgrimes		 * and toss data from remaining mbufs on chain.
9281541Srgrimes		 */
9291541Srgrimes		m = mp;
9301541Srgrimes		if (m->m_flags & M_PKTHDR)
9311541Srgrimes			m->m_pkthdr.len = count;
9321541Srgrimes		for (; m; m = m->m_next) {
9331541Srgrimes			if (m->m_len >= count) {
9341541Srgrimes				m->m_len = count;
9351541Srgrimes				break;
9361541Srgrimes			}
9371541Srgrimes			count -= m->m_len;
9381541Srgrimes		}
9393308Sphk		while (m->m_next)
9403308Sphk			(m = m->m_next) ->m_len = 0;
9411541Srgrimes	}
9421541Srgrimes}
9431541Srgrimes
9441541Srgrimes/*
9451541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous
9461541Srgrimes * and in the data area of an mbuf (so that mtod and dtom
9471541Srgrimes * will work for a structure of size len).  Returns the resulting
9481541Srgrimes * mbuf chain on success, frees it and returns null on failure.
9491541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the
9501541Srgrimes * contiguous region in an attempt to avoid being called next time.
9511541Srgrimes */
9521541Srgrimesstruct mbuf *
95372356Sbmilekicm_pullup(struct mbuf *n, int len)
9541541Srgrimes{
95572356Sbmilekic	struct mbuf *m;
95672356Sbmilekic	int count;
9571541Srgrimes	int space;
9581541Srgrimes
9591541Srgrimes	/*
9601541Srgrimes	 * If first mbuf has no cluster, and has room for len bytes
9611541Srgrimes	 * without shifting current data, pullup into it,
9621541Srgrimes	 * otherwise allocate a new mbuf to prepend to the chain.
9631541Srgrimes	 */
9641541Srgrimes	if ((n->m_flags & M_EXT) == 0 &&
9651541Srgrimes	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
9661541Srgrimes		if (n->m_len >= len)
9671541Srgrimes			return (n);
9681541Srgrimes		m = n;
9691541Srgrimes		n = n->m_next;
9701541Srgrimes		len -= m->m_len;
9711541Srgrimes	} else {
9721541Srgrimes		if (len > MHLEN)
9731541Srgrimes			goto bad;
9741541Srgrimes		MGET(m, M_DONTWAIT, n->m_type);
97572356Sbmilekic		if (m == NULL)
9761541Srgrimes			goto bad;
9771541Srgrimes		m->m_len = 0;
9781541Srgrimes		if (n->m_flags & M_PKTHDR) {
9791541Srgrimes			M_COPY_PKTHDR(m, n);
9801541Srgrimes			n->m_flags &= ~M_PKTHDR;
9811541Srgrimes		}
9821541Srgrimes	}
9831541Srgrimes	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
9841541Srgrimes	do {
9851541Srgrimes		count = min(min(max(len, max_protohdr), space), n->m_len);
9861541Srgrimes		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
9871541Srgrimes		  (unsigned)count);
9881541Srgrimes		len -= count;
9891541Srgrimes		m->m_len += count;
9901541Srgrimes		n->m_len -= count;
9911541Srgrimes		space -= count;
9921541Srgrimes		if (n->m_len)
9931541Srgrimes			n->m_data += count;
9941541Srgrimes		else
9951541Srgrimes			n = m_free(n);
9961541Srgrimes	} while (len > 0 && n);
9971541Srgrimes	if (len > 0) {
9981541Srgrimes		(void) m_free(m);
9991541Srgrimes		goto bad;
10001541Srgrimes	}
10011541Srgrimes	m->m_next = n;
10021541Srgrimes	return (m);
10031541Srgrimesbad:
10041541Srgrimes	m_freem(n);
100575105Salfred	mtx_lock(&mbuf_mtx);
100675105Salfred	mbstat.m_mcfail++;
100775105Salfred	mtx_unlock(&mbuf_mtx);
100872356Sbmilekic	return (NULL);
10091541Srgrimes}
10101541Srgrimes
10111541Srgrimes/*
10121541Srgrimes * Partition an mbuf chain in two pieces, returning the tail --
10131541Srgrimes * all but the first len0 bytes.  In case of failure, it returns NULL and
10141541Srgrimes * attempts to restore the chain to its original state.
10151541Srgrimes */
10161541Srgrimesstruct mbuf *
101772356Sbmilekicm_split(struct mbuf *m0, int len0, int wait)
10181541Srgrimes{
101972356Sbmilekic	struct mbuf *m, *n;
10201541Srgrimes	unsigned len = len0, remain;
10211541Srgrimes
10221541Srgrimes	for (m = m0; m && len > m->m_len; m = m->m_next)
10231541Srgrimes		len -= m->m_len;
102472356Sbmilekic	if (m == NULL)
102572356Sbmilekic		return (NULL);
10261541Srgrimes	remain = m->m_len - len;
10271541Srgrimes	if (m0->m_flags & M_PKTHDR) {
10281541Srgrimes		MGETHDR(n, wait, m0->m_type);
102972356Sbmilekic		if (n == NULL)
103072356Sbmilekic			return (NULL);
10311541Srgrimes		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10321541Srgrimes		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
10331541Srgrimes		m0->m_pkthdr.len = len0;
10341541Srgrimes		if (m->m_flags & M_EXT)
10351541Srgrimes			goto extpacket;
10361541Srgrimes		if (remain > MHLEN) {
10371541Srgrimes			/* m can't be the lead packet */
10381541Srgrimes			MH_ALIGN(n, 0);
10391541Srgrimes			n->m_next = m_split(m, len, wait);
104072356Sbmilekic			if (n->m_next == NULL) {
10411541Srgrimes				(void) m_free(n);
104272356Sbmilekic				return (NULL);
10431541Srgrimes			} else
10441541Srgrimes				return (n);
10451541Srgrimes		} else
10461541Srgrimes			MH_ALIGN(n, remain);
10471541Srgrimes	} else if (remain == 0) {
10481541Srgrimes		n = m->m_next;
104972356Sbmilekic		m->m_next = NULL;
10501541Srgrimes		return (n);
10511541Srgrimes	} else {
10521541Srgrimes		MGET(n, wait, m->m_type);
105372356Sbmilekic		if (n == NULL)
105472356Sbmilekic			return (NULL);
10551541Srgrimes		M_ALIGN(n, remain);
10561541Srgrimes	}
10571541Srgrimesextpacket:
10581541Srgrimes	if (m->m_flags & M_EXT) {
10591541Srgrimes		n->m_flags |= M_EXT;
10601541Srgrimes		n->m_ext = m->m_ext;
106164837Sdwmalone		MEXT_ADD_REF(m);
10621541Srgrimes		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
10631541Srgrimes		n->m_data = m->m_data + len;
10641541Srgrimes	} else {
10651541Srgrimes		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
10661541Srgrimes	}
10671541Srgrimes	n->m_len = remain;
10681541Srgrimes	m->m_len = len;
10691541Srgrimes	n->m_next = m->m_next;
107072356Sbmilekic	m->m_next = NULL;
10711541Srgrimes	return (n);
10721541Srgrimes}
10731541Srgrimes/*
10741541Srgrimes * Routine to copy from device local memory into mbufs.
10751541Srgrimes */
10761541Srgrimesstruct mbuf *
107772356Sbmilekicm_devget(char *buf, int totlen, int off0, struct ifnet *ifp,
107872356Sbmilekic	 void (*copy)(char *from, caddr_t to, u_int len))
10791541Srgrimes{
108072356Sbmilekic	struct mbuf *m;
10811541Srgrimes	struct mbuf *top = 0, **mp = &top;
108272356Sbmilekic	int off = off0, len;
108372356Sbmilekic	char *cp;
10841541Srgrimes	char *epkt;
10851541Srgrimes
10861541Srgrimes	cp = buf;
10871541Srgrimes	epkt = cp + totlen;
10881541Srgrimes	if (off) {
10891541Srgrimes		cp += off + 2 * sizeof(u_short);
10901541Srgrimes		totlen -= 2 * sizeof(u_short);
10911541Srgrimes	}
10921541Srgrimes	MGETHDR(m, M_DONTWAIT, MT_DATA);
109372356Sbmilekic	if (m == NULL)
109472356Sbmilekic		return (NULL);
10951541Srgrimes	m->m_pkthdr.rcvif = ifp;
10961541Srgrimes	m->m_pkthdr.len = totlen;
10971541Srgrimes	m->m_len = MHLEN;
10981541Srgrimes
10991541Srgrimes	while (totlen > 0) {
11001541Srgrimes		if (top) {
11011541Srgrimes			MGET(m, M_DONTWAIT, MT_DATA);
110272356Sbmilekic			if (m == NULL) {
11031541Srgrimes				m_freem(top);
110472356Sbmilekic				return (NULL);
11051541Srgrimes			}
11061541Srgrimes			m->m_len = MLEN;
11071541Srgrimes		}
11081541Srgrimes		len = min(totlen, epkt - cp);
11091541Srgrimes		if (len >= MINCLSIZE) {
11101541Srgrimes			MCLGET(m, M_DONTWAIT);
11111541Srgrimes			if (m->m_flags & M_EXT)
11121541Srgrimes				m->m_len = len = min(len, MCLBYTES);
11131541Srgrimes			else
11141541Srgrimes				len = m->m_len;
11151541Srgrimes		} else {
11161541Srgrimes			/*
11171541Srgrimes			 * Place initial small packet/header at end of mbuf.
11181541Srgrimes			 */
11191541Srgrimes			if (len < m->m_len) {
112072356Sbmilekic				if (top == NULL && len +
112172356Sbmilekic				    max_linkhdr <= m->m_len)
11221541Srgrimes					m->m_data += max_linkhdr;
11231541Srgrimes				m->m_len = len;
11241541Srgrimes			} else
11251541Srgrimes				len = m->m_len;
11261541Srgrimes		}
11271541Srgrimes		if (copy)
11281541Srgrimes			copy(cp, mtod(m, caddr_t), (unsigned)len);
11291541Srgrimes		else
11301541Srgrimes			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
11311541Srgrimes		cp += len;
11321541Srgrimes		*mp = m;
11331541Srgrimes		mp = &m->m_next;
11341541Srgrimes		totlen -= len;
11351541Srgrimes		if (cp == epkt)
11361541Srgrimes			cp = buf;
11371541Srgrimes	}
11381541Srgrimes	return (top);
11391541Srgrimes}
11403352Sphk
11413352Sphk/*
11423352Sphk * Copy data from a buffer back into the indicated mbuf chain,
11433352Sphk * starting "off" bytes from the beginning, extending the mbuf
11443352Sphk * chain if necessary.
11453352Sphk */
11463352Sphkvoid
114772356Sbmilekicm_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
11483352Sphk{
114972356Sbmilekic	int mlen;
115072356Sbmilekic	struct mbuf *m = m0, *n;
11513352Sphk	int totlen = 0;
11523352Sphk
115372356Sbmilekic	if (m0 == NULL)
11543352Sphk		return;
11553352Sphk	while (off > (mlen = m->m_len)) {
11563352Sphk		off -= mlen;
11573352Sphk		totlen += mlen;
115872356Sbmilekic		if (m->m_next == NULL) {
11593352Sphk			n = m_getclr(M_DONTWAIT, m->m_type);
116072356Sbmilekic			if (n == NULL)
11613352Sphk				goto out;
11623352Sphk			n->m_len = min(MLEN, len + off);
11633352Sphk			m->m_next = n;
11643352Sphk		}
11653352Sphk		m = m->m_next;
11663352Sphk	}
11673352Sphk	while (len > 0) {
11683352Sphk		mlen = min (m->m_len - off, len);
11693352Sphk		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
11703352Sphk		cp += mlen;
11713352Sphk		len -= mlen;
11723352Sphk		mlen += off;
11733352Sphk		off = 0;
11743352Sphk		totlen += mlen;
11753352Sphk		if (len == 0)
11763352Sphk			break;
117772356Sbmilekic		if (m->m_next == NULL) {
11783352Sphk			n = m_get(M_DONTWAIT, m->m_type);
117972356Sbmilekic			if (n == NULL)
11803352Sphk				break;
11813352Sphk			n->m_len = min(MLEN, len);
11823352Sphk			m->m_next = n;
11833352Sphk		}
11843352Sphk		m = m->m_next;
11853352Sphk	}
11863352Sphkout:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
11873352Sphk		m->m_pkthdr.len = totlen;
11883352Sphk}
118952756Sphk
119052756Sphkvoid
119152756Sphkm_print(const struct mbuf *m)
119252756Sphk{
119352756Sphk	int len;
119454906Seivind	const struct mbuf *m2;
119552756Sphk
119652756Sphk	len = m->m_pkthdr.len;
119752756Sphk	m2 = m;
119852756Sphk	while (len) {
119952756Sphk		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
120052756Sphk		len -= m2->m_len;
120152756Sphk		m2 = m2->m_next;
120252756Sphk	}
120352756Sphk	return;
120452756Sphk}
1205