uipc_mbuf.c revision 71089
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 331541Srgrimes * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 71089 2001-01-16 01:53:13Z bmilekic $ 351541Srgrimes */ 361541Srgrimes 3748579Smsmith#include "opt_param.h" 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 4032036Sbde#include <sys/malloc.h> 411541Srgrimes#include <sys/mbuf.h> 4267365Sjhb#include <sys/mutex.h> 431541Srgrimes#include <sys/kernel.h> 4423081Swollman#include <sys/sysctl.h> 451541Srgrimes#include <sys/domain.h> 461541Srgrimes#include <sys/protosw.h> 471541Srgrimes#include <vm/vm.h> 489759Sbde#include <vm/vm_kern.h> 4912662Sdg#include <vm/vm_extern.h> 501541Srgrimes 5110653Sdgstatic void mbinit __P((void *)); 5210358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 5310358Sjulian 549759Sbdestruct mbuf *mbutl; 559759Sbdestruct mbstat mbstat; 5663203Salfredu_long mbtypes[MT_NTYPES]; 579759Sbdeint max_linkhdr; 589759Sbdeint max_protohdr; 599759Sbdeint max_hdr; 609759Sbdeint max_datalen; 6148579Smsmithint nmbclusters; 6248579Smsmithint nmbufs; 6367144Sbmilekicint nmbcnt; 6466475Sbmilekicu_long m_mballoc_wid = 0; 6566475Sbmilekicu_long m_clalloc_wid = 0; 661541Srgrimes 6766475Sbmilekic/* 6866475Sbmilekic * freelist header structures... 6966475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst 7066475Sbmilekic */ 7166475Sbmilekicstruct mbffree_lst mmbfree; 7266475Sbmilekicstruct mclfree_lst mclfree; 7366475Sbmilekicstruct mcntfree_lst mcntfree; 7466475Sbmilekic 7566475Sbmilekic/* 7666475Sbmilekic * sysctl(8) exported objects 7766475Sbmilekic */ 7844078SdfrSYSCTL_DECL(_kern_ipc); 7923081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 8023081Swollman &max_linkhdr, 0, ""); 8123081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 8223081Swollman &max_protohdr, 0, ""); 8323081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 8423081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 8523081Swollman &max_datalen, 0, ""); 8654478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 8754478Sgreen &mbuf_wait, 0, ""); 8864048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 8963203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 9063203Salfred sizeof(mbtypes), "LU", ""); 9148579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 9255171Smsmith &nmbclusters, 0, "Maximum number of mbuf clusters available"); 9355171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 9455171Smsmith "Maximum number of mbufs available"); 9567144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 9667144Sbmilekic "Maximum number of ext_buf counters available"); 9748579Smsmith#ifndef NMBCLUSTERS 9848579Smsmith#define NMBCLUSTERS (512 + MAXUSERS * 16) 9948579Smsmith#endif 10048579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 10155171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 10267144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 10323081Swollman 10412819Sphkstatic void m_reclaim __P((void)); 10512819Sphk 10666475Sbmilekic/* Initial allocation numbers */ 10764837Sdwmalone#define NCL_INIT 2 10815744Sphk#define NMB_INIT 16 10966475Sbmilekic#define REF_INIT NMBCLUSTERS 11015744Sphk 11166475Sbmilekic/* 11266475Sbmilekic * Full mbuf subsystem initialization done here. 11366475Sbmilekic * 11466475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to 11566475Sbmilekic * machdep.c - for now, there is no reason for this stuff to go there. 11666475Sbmilekic */ 11710358Sjulianstatic void 11812569Sbdembinit(dummy) 11912569Sbde void *dummy; 1201541Srgrimes{ 12166475Sbmilekic vm_offset_t maxaddr, mb_map_size; 1221541Srgrimes 12366475Sbmilekic /* 12466475Sbmilekic * Setup the mb_map, allocate requested VM space. 12566475Sbmilekic */ 12667144Sbmilekic mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt 12766475Sbmilekic * sizeof(union mext_refcnt); 12866475Sbmilekic mb_map_size = roundup2(mb_map_size, PAGE_SIZE); 12966475Sbmilekic mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 13066475Sbmilekic mb_map_size); 13166475Sbmilekic /* XXX: mb_map->system_map = 1; */ 13264837Sdwmalone 13366475Sbmilekic /* 13466475Sbmilekic * Initialize the free list headers, and setup locks for lists. 13566475Sbmilekic */ 13666475Sbmilekic mmbfree.m_head = NULL; 13766475Sbmilekic mclfree.m_head = NULL; 13866475Sbmilekic mcntfree.m_head = NULL; 13966475Sbmilekic mtx_init(&mmbfree.m_mtx, "mbuf free list lock", MTX_DEF); 14066475Sbmilekic mtx_init(&mclfree.m_mtx, "mcluster free list lock", MTX_DEF); 14166475Sbmilekic mtx_init(&mcntfree.m_mtx, "m_ext counter free list lock", MTX_DEF); 14266475Sbmilekic 14366475Sbmilekic /* 14466475Sbmilekic * Initialize mbuf subsystem (sysctl exported) statistics structure. 14566475Sbmilekic */ 14623081Swollman mbstat.m_msize = MSIZE; 14723081Swollman mbstat.m_mclbytes = MCLBYTES; 14823081Swollman mbstat.m_minclsize = MINCLSIZE; 14923081Swollman mbstat.m_mlen = MLEN; 15023081Swollman mbstat.m_mhlen = MHLEN; 15123081Swollman 15266475Sbmilekic /* 15366475Sbmilekic * Perform some initial allocations. 15466475Sbmilekic */ 15566475Sbmilekic mtx_enter(&mcntfree.m_mtx, MTX_DEF); 15666475Sbmilekic if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 15764837Sdwmalone goto bad; 15866475Sbmilekic mtx_exit(&mcntfree.m_mtx, MTX_DEF); 15966475Sbmilekic 16066475Sbmilekic mtx_enter(&mmbfree.m_mtx, MTX_DEF); 16115689Swollman if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 16215689Swollman goto bad; 16366475Sbmilekic mtx_exit(&mmbfree.m_mtx, MTX_DEF); 16466475Sbmilekic 16566475Sbmilekic mtx_enter(&mclfree.m_mtx, MTX_DEF); 1661541Srgrimes if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 1671541Srgrimes goto bad; 16866475Sbmilekic mtx_exit(&mclfree.m_mtx, MTX_DEF); 16966475Sbmilekic 1701541Srgrimes return; 1711541Srgrimesbad: 17264837Sdwmalone panic("mbinit: failed to initialize mbuf subsystem!"); 1731541Srgrimes} 1741541Srgrimes 1751541Srgrimes/* 17664837Sdwmalone * Allocate at least nmb reference count structs and place them 17764837Sdwmalone * on the ref cnt free list. 17866475Sbmilekic * 17966475Sbmilekic * Must be called with the mcntfree lock held. 18064837Sdwmalone */ 18164837Sdwmaloneint 18266475Sbmilekicm_alloc_ref(nmb, how) 18364837Sdwmalone u_int nmb; 18466475Sbmilekic int how; 18564837Sdwmalone{ 18664837Sdwmalone caddr_t p; 18764837Sdwmalone u_int nbytes; 18864837Sdwmalone int i; 18964837Sdwmalone 19064837Sdwmalone /* 19164837Sdwmalone * We don't cap the amount of memory that can be used 19264837Sdwmalone * by the reference counters, like we do for mbufs and 19366475Sbmilekic * mbuf clusters. In fact, we're absolutely sure that we 19466475Sbmilekic * won't ever be going over our allocated space. We keep enough 19566475Sbmilekic * space in mb_map to accomodate maximum values of allocatable 19666475Sbmilekic * external buffers including, but not limited to, clusters. 19766475Sbmilekic * (That's also why we won't have to have wait routines for 19866475Sbmilekic * counters). 19966475Sbmilekic * 20066475Sbmilekic * If we're in here, we're absolutely certain to be returning 20166475Sbmilekic * succesfully, as long as there is physical memory to accomodate 20266475Sbmilekic * us. And if there isn't, but we're willing to wait, then 20366475Sbmilekic * kmem_malloc() will do the only waiting needed. 20464837Sdwmalone */ 20564837Sdwmalone 20664837Sdwmalone nbytes = round_page(nmb * sizeof(union mext_refcnt)); 20766475Sbmilekic mtx_exit(&mcntfree.m_mtx, MTX_DEF); 20871089Sbmilekic#ifdef WITNESS 20971089Sbmilekic /* 21071089Sbmilekic * XXX: Make sure we don't create lock order problems. 21171089Sbmilekic * XXX: We'll grab Giant, but for that to be OK, make sure 21271089Sbmilekic * XXX: that either Giant is already held OR make sure that 21371089Sbmilekic * XXX: no other locks are held coming in. 21471089Sbmilekic * XXX: Revisit once most of the net stuff gets locks added. 21571089Sbmilekic */ 21671089Sbmilekic KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0, 21771089Sbmilekic ("m_alloc_ref: Giant must be owned or no locks held")); 21871089Sbmilekic#endif 21966475Sbmilekic mtx_enter(&Giant, MTX_DEF); 22070254Sbmilekic if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 22170254Sbmilekic M_WAITOK : M_NOWAIT)) == NULL) { 22266475Sbmilekic mtx_exit(&Giant, MTX_DEF); 22371089Sbmilekic mtx_enter(&mcntfree.m_mtx, MTX_DEF); 22464837Sdwmalone return (0); 22566475Sbmilekic } 22666475Sbmilekic mtx_exit(&Giant, MTX_DEF); 22764837Sdwmalone nmb = nbytes / sizeof(union mext_refcnt); 22864837Sdwmalone 22966475Sbmilekic /* 23066475Sbmilekic * We don't let go of the mutex in order to avoid a race. 23166475Sbmilekic * It is up to the caller to let go of the mutex. 23266475Sbmilekic */ 23366475Sbmilekic mtx_enter(&mcntfree.m_mtx, MTX_DEF); 23464837Sdwmalone for (i = 0; i < nmb; i++) { 23566475Sbmilekic ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 23666475Sbmilekic mcntfree.m_head = (union mext_refcnt *)p; 23764837Sdwmalone p += sizeof(union mext_refcnt); 23864837Sdwmalone mbstat.m_refree++; 23964837Sdwmalone } 24064837Sdwmalone mbstat.m_refcnt += nmb; 24164837Sdwmalone 24264837Sdwmalone return (1); 24364837Sdwmalone} 24464837Sdwmalone 24564837Sdwmalone/* 24615689Swollman * Allocate at least nmb mbufs and place on mbuf free list. 24766475Sbmilekic * 24866475Sbmilekic * Must be called with the mmbfree lock held. 24915689Swollman */ 25015689Swollmanint 25132036Sbdem_mballoc(nmb, how) 25215689Swollman register int nmb; 25332036Sbde int how; 25415689Swollman{ 25515689Swollman register caddr_t p; 25615689Swollman register int i; 25715689Swollman int nbytes; 25815689Swollman 25954478Sgreen /* 26066475Sbmilekic * If we've hit the mbuf limit, stop allocating from mb_map. 26166475Sbmilekic * Also, once we run out of map space, it will be impossible to 26266475Sbmilekic * get any more (nothing is ever freed back to the map). 26355171Smsmith */ 26466475Sbmilekic if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) { 26566475Sbmilekic /* 26666475Sbmilekic * Needs to be atomic as we may be incrementing it 26766475Sbmilekic * while holding another mutex, like mclfree. In other 26866475Sbmilekic * words, m_drops is not reserved solely for mbufs, 26966475Sbmilekic * but is also available for clusters. 27066475Sbmilekic */ 27166475Sbmilekic atomic_add_long(&mbstat.m_drops, 1); 27255171Smsmith return (0); 27366475Sbmilekic } 27455171Smsmith 27566475Sbmilekic nbytes = round_page(nmb * MSIZE); 27615689Swollman 27766475Sbmilekic mtx_exit(&mmbfree.m_mtx, MTX_DEF); 27871089Sbmilekic#ifdef WITNESS 27971089Sbmilekic /* 28071089Sbmilekic * XXX: Make sure we don't create lock order problems. 28171089Sbmilekic * XXX: We'll grab Giant, but for that to be OK, make sure 28271089Sbmilekic * XXX: that either Giant is already held OR make sure that 28371089Sbmilekic * XXX: no other locks are held coming in. 28471089Sbmilekic * XXX: Revisit once most of the net stuff gets locks added. 28571089Sbmilekic */ 28671089Sbmilekic KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0, 28771089Sbmilekic ("m_mballoc: Giant must be owned or no locks held")); 28871089Sbmilekic#endif 28966475Sbmilekic mtx_enter(&Giant, MTX_DEF); 29022899Swollman p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT); 29170254Sbmilekic if (p == 0 && how == M_TRYWAIT) { 29266475Sbmilekic atomic_add_long(&mbstat.m_wait, 1); 29322899Swollman p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK); 29422899Swollman } 29566475Sbmilekic mtx_exit(&Giant, MTX_DEF); 29666475Sbmilekic mtx_enter(&mmbfree.m_mtx, MTX_DEF); 29722899Swollman 29815689Swollman /* 29966475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 30015689Swollman * are no pages left. 30115689Swollman */ 30215689Swollman if (p == NULL) 30315689Swollman return (0); 30415689Swollman 30515689Swollman nmb = nbytes / MSIZE; 30666475Sbmilekic 30766475Sbmilekic /* 30866475Sbmilekic * We don't let go of the mutex in order to avoid a race. 30966475Sbmilekic * It is up to the caller to let go of the mutex when done 31066475Sbmilekic * with grabbing the mbuf from the free list. 31166475Sbmilekic */ 31215689Swollman for (i = 0; i < nmb; i++) { 31366475Sbmilekic ((struct mbuf *)p)->m_next = mmbfree.m_head; 31466475Sbmilekic mmbfree.m_head = (struct mbuf *)p; 31515689Swollman p += MSIZE; 31615689Swollman } 31715689Swollman mbstat.m_mbufs += nmb; 31863203Salfred mbtypes[MT_FREE] += nmb; 31915689Swollman return (1); 32015689Swollman} 32115689Swollman 32254478Sgreen/* 32354478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros 32470254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 32570254Sbmilekic * rely solely on reclaimed mbufs. 32666475Sbmilekic * 32766475Sbmilekic * Here we request for the protocols to free up some resources and, if we 32866475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a 32954478Sgreen * designated (mbuf_wait) time. 33066475Sbmilekic * 33170858Sbmilekic * Must be called with the mmbfree mutex held. 33254478Sgreen */ 33354478Sgreenstruct mbuf * 33466475Sbmilekicm_mballoc_wait(void) 33554478Sgreen{ 33666475Sbmilekic struct mbuf *p = NULL; 33754478Sgreen 33854478Sgreen /* 33966475Sbmilekic * See if we can drain some resources out of the protocols. 34070858Sbmilekic * We drop the mmbfree mutex to avoid recursing into it in some of 34170858Sbmilekic * the drain routines. Clearly, we're faced with a race here because 34270858Sbmilekic * once something is freed during the drain, it may be grabbed right 34370858Sbmilekic * from under us by some other thread. But we accept this possibility 34470858Sbmilekic * in order to avoid a potentially large lock recursion and, more 34570858Sbmilekic * importantly, to avoid a potential lock order reversal which may 34670858Sbmilekic * result in deadlock (See comment above m_reclaim()). 34754478Sgreen */ 34870858Sbmilekic mtx_exit(&mmbfree.m_mtx, MTX_DEF); 34966475Sbmilekic m_reclaim(); 35070858Sbmilekic 35170858Sbmilekic mtx_enter(&mmbfree.m_mtx, MTX_DEF); 35266475Sbmilekic _MGET(p, M_DONTWAIT); 35354478Sgreen 35466475Sbmilekic if (p == NULL) { 35566475Sbmilekic m_mballoc_wid++; 35666475Sbmilekic if (msleep(&m_mballoc_wid, &mmbfree.m_mtx, PVM, "mballc", 35766475Sbmilekic mbuf_wait) == EWOULDBLOCK) 35866475Sbmilekic m_mballoc_wid--; 35966475Sbmilekic 36066475Sbmilekic /* 36166475Sbmilekic * Try again (one last time). 36266475Sbmilekic * 36366475Sbmilekic * We retry to fetch _even_ if the sleep timed out. This 36466475Sbmilekic * is left this way, purposely, in the [unlikely] case 36566475Sbmilekic * that an mbuf was freed but the sleep was not awoken 36666475Sbmilekic * in time. 36766475Sbmilekic * 36866475Sbmilekic * If the sleep didn't time out (i.e. we got woken up) then 36966475Sbmilekic * we have the lock so we just grab an mbuf, hopefully. 37066475Sbmilekic */ 37166475Sbmilekic _MGET(p, M_DONTWAIT); 37254478Sgreen } 37354478Sgreen 37466475Sbmilekic /* If we waited and got something... */ 37566475Sbmilekic if (p != NULL) { 37666475Sbmilekic atomic_add_long(&mbstat.m_wait, 1); 37766475Sbmilekic if (mmbfree.m_head != NULL) 37866475Sbmilekic MBWAKEUP(m_mballoc_wid); 37966475Sbmilekic } else 38066475Sbmilekic atomic_add_long(&mbstat.m_drops, 1); 38122671Swollman 38266475Sbmilekic return (p); 38322671Swollman} 38422671Swollman 38515689Swollman/* 3861541Srgrimes * Allocate some number of mbuf clusters 3871541Srgrimes * and place on cluster free list. 38866475Sbmilekic * 38966475Sbmilekic * Must be called with the mclfree lock held. 3901541Srgrimes */ 3911549Srgrimesint 39232036Sbdem_clalloc(ncl, how) 3931541Srgrimes register int ncl; 39432036Sbde int how; 3951541Srgrimes{ 3961541Srgrimes register caddr_t p; 3971541Srgrimes register int i; 3981541Srgrimes int npg; 3991541Srgrimes 4007066Sdg /* 40166475Sbmilekic * If the map is now full (nothing will ever be freed to it). 40255171Smsmith * If we've hit the mcluster number limit, stop allocating from 40366475Sbmilekic * mb_map. 40455171Smsmith */ 40566475Sbmilekic if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) { 40666475Sbmilekic atomic_add_long(&mbstat.m_drops, 1); 40755171Smsmith return (0); 40855171Smsmith } 40955171Smsmith 41015543Sphk npg = ncl; 41166475Sbmilekic mtx_exit(&mclfree.m_mtx, MTX_DEF); 41271089Sbmilekic#ifdef WITNESS 41371089Sbmilekic /* 41471089Sbmilekic * XXX: Make sure we don't create lock order problems. 41571089Sbmilekic * XXX: We'll grab Giant, but for that to be OK, make sure 41671089Sbmilekic * XXX: that either Giant is already held OR make sure that 41771089Sbmilekic * XXX: no other locks are held coming in. 41871089Sbmilekic * XXX: Revisit once most of the net stuff gets locks added. 41971089Sbmilekic */ 42071089Sbmilekic KASSERT(mtx_owned(&Giant) || witness_list(CURPROC) == 0, 42171089Sbmilekic ("m_clalloc: Giant must be owned or no locks held")); 42271089Sbmilekic#endif 42366475Sbmilekic mtx_enter(&Giant, MTX_DEF); 42421737Sdg p = (caddr_t)kmem_malloc(mb_map, ctob(npg), 42570254Sbmilekic how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 42666475Sbmilekic mtx_exit(&Giant, MTX_DEF); 42722671Swollman ncl = ncl * PAGE_SIZE / MCLBYTES; 42866475Sbmilekic mtx_enter(&mclfree.m_mtx, MTX_DEF); 42966475Sbmilekic 4307066Sdg /* 43166475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 4327066Sdg * are no pages left. 4337066Sdg */ 43422899Swollman if (p == NULL) { 43566475Sbmilekic atomic_add_long(&mbstat.m_drops, 1); 4361541Srgrimes return (0); 43722899Swollman } 4387066Sdg 43966475Sbmilekic /* 44066475Sbmilekic * We don't let go of the mutex in order to avoid a race. 44166475Sbmilekic */ 4421541Srgrimes for (i = 0; i < ncl; i++) { 44366475Sbmilekic ((union mcluster *)p)->mcl_next = mclfree.m_head; 44466475Sbmilekic mclfree.m_head = (union mcluster *)p; 4451541Srgrimes p += MCLBYTES; 4461541Srgrimes mbstat.m_clfree++; 4471541Srgrimes } 4481541Srgrimes mbstat.m_clusters += ncl; 4491541Srgrimes return (1); 4501541Srgrimes} 4511541Srgrimes 4521541Srgrimes/* 45354478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with 45470254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 45554478Sgreen * sleep for a designated amount of time (mbuf_wait) or until we're woken up 45654478Sgreen * due to sudden mcluster availability. 45766475Sbmilekic * 45866475Sbmilekic * Must be called with the mclfree lock held. 45954478Sgreen */ 46054478Sgreencaddr_t 46154478Sgreenm_clalloc_wait(void) 46254478Sgreen{ 46366475Sbmilekic caddr_t p = NULL; 46454478Sgreen 46554478Sgreen m_clalloc_wid++; 46666475Sbmilekic if (msleep(&m_clalloc_wid, &mclfree.m_mtx, PVM, "mclalc", mbuf_wait) 46766475Sbmilekic == EWOULDBLOCK) 46854478Sgreen m_clalloc_wid--; 46954478Sgreen 47054478Sgreen /* 47166475Sbmilekic * Now that we (think) that we've got something, try again. 47254478Sgreen */ 47364837Sdwmalone _MCLALLOC(p, M_DONTWAIT); 47454478Sgreen 47566475Sbmilekic /* If we waited and got something ... */ 47666475Sbmilekic if (p != NULL) { 47766475Sbmilekic atomic_add_long(&mbstat.m_wait, 1); 47866475Sbmilekic if (mclfree.m_head != NULL) 47966475Sbmilekic MBWAKEUP(m_clalloc_wid); 48066475Sbmilekic } else 48166475Sbmilekic atomic_add_long(&mbstat.m_drops, 1); 48254478Sgreen 48354478Sgreen return (p); 48454478Sgreen} 48554478Sgreen 48654478Sgreen/* 48766475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources... 48866475Sbmilekic * 48970858Sbmilekic * XXX: No locks should be held going in here. The drain routines have 49070858Sbmilekic * to presently acquire some locks which raises the possibility of lock 49170858Sbmilekic * order violation if we're holding any mutex if that mutex is acquired in 49270858Sbmilekic * reverse order relative to one of the locks in the drain routines. 4931541Srgrimes */ 49412819Sphkstatic void 4951541Srgrimesm_reclaim() 4961541Srgrimes{ 4971541Srgrimes register struct domain *dp; 4981541Srgrimes register struct protosw *pr; 4991541Srgrimes 50071089Sbmilekic#ifdef WITNESS 50171089Sbmilekic KASSERT(witness_list(CURPROC) == 0, 50271089Sbmilekic ("m_reclaim called with locks held")); 50371089Sbmilekic#endif 50471089Sbmilekic 5051541Srgrimes for (dp = domains; dp; dp = dp->dom_next) 5061541Srgrimes for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 5071541Srgrimes if (pr->pr_drain) 5081541Srgrimes (*pr->pr_drain)(); 5091541Srgrimes mbstat.m_drain++; 5101541Srgrimes} 5111541Srgrimes 5121541Srgrimes/* 5131541Srgrimes * Space allocation routines. 5141541Srgrimes * These are also available as macros 5151541Srgrimes * for critical paths. 5161541Srgrimes */ 5171541Srgrimesstruct mbuf * 51832036Sbdem_get(how, type) 51932036Sbde int how, type; 5201541Srgrimes{ 5211541Srgrimes register struct mbuf *m; 5221541Srgrimes 52332036Sbde MGET(m, how, type); 5241541Srgrimes return (m); 5251541Srgrimes} 5261541Srgrimes 5271541Srgrimesstruct mbuf * 52832036Sbdem_gethdr(how, type) 52932036Sbde int how, type; 5301541Srgrimes{ 5311541Srgrimes register struct mbuf *m; 5321541Srgrimes 53332036Sbde MGETHDR(m, how, type); 5341541Srgrimes return (m); 5351541Srgrimes} 5361541Srgrimes 5371541Srgrimesstruct mbuf * 53832036Sbdem_getclr(how, type) 53932036Sbde int how, type; 5401541Srgrimes{ 5411541Srgrimes register struct mbuf *m; 5421541Srgrimes 54332036Sbde MGET(m, how, type); 5441541Srgrimes if (m == 0) 5451541Srgrimes return (0); 5461541Srgrimes bzero(mtod(m, caddr_t), MLEN); 5471541Srgrimes return (m); 5481541Srgrimes} 5491541Srgrimes 5501541Srgrimesstruct mbuf * 5511541Srgrimesm_free(m) 5521541Srgrimes struct mbuf *m; 5531541Srgrimes{ 5541541Srgrimes register struct mbuf *n; 5551541Srgrimes 5561541Srgrimes MFREE(m, n); 5571541Srgrimes return (n); 5581541Srgrimes} 5591541Srgrimes 5601541Srgrimesvoid 5611541Srgrimesm_freem(m) 5621541Srgrimes register struct mbuf *m; 5631541Srgrimes{ 5641541Srgrimes register struct mbuf *n; 5651541Srgrimes 5661541Srgrimes if (m == NULL) 5671541Srgrimes return; 5681541Srgrimes do { 56962587Sitojun /* 57062587Sitojun * we do need to check non-first mbuf, since some of existing 57162587Sitojun * code does not call M_PREPEND properly. 57262587Sitojun * (example: call to bpf_mtap from drivers) 57362587Sitojun */ 57462587Sitojun if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 57562587Sitojun m_freem(m->m_pkthdr.aux); 57662587Sitojun m->m_pkthdr.aux = NULL; 57762587Sitojun } 5781541Srgrimes MFREE(m, n); 5793308Sphk m = n; 5803308Sphk } while (m); 5811541Srgrimes} 5821541Srgrimes 5831541Srgrimes/* 5841541Srgrimes * Mbuffer utility routines. 5851541Srgrimes */ 5861541Srgrimes 5871541Srgrimes/* 5881541Srgrimes * Lesser-used path for M_PREPEND: 5891541Srgrimes * allocate new mbuf to prepend to chain, 5901541Srgrimes * copy junk along. 5911541Srgrimes */ 5921541Srgrimesstruct mbuf * 5931541Srgrimesm_prepend(m, len, how) 5941541Srgrimes register struct mbuf *m; 5951541Srgrimes int len, how; 5961541Srgrimes{ 5971541Srgrimes struct mbuf *mn; 5981541Srgrimes 5991541Srgrimes MGET(mn, how, m->m_type); 6001541Srgrimes if (mn == (struct mbuf *)NULL) { 6011541Srgrimes m_freem(m); 6021541Srgrimes return ((struct mbuf *)NULL); 6031541Srgrimes } 6041541Srgrimes if (m->m_flags & M_PKTHDR) { 6051541Srgrimes M_COPY_PKTHDR(mn, m); 6061541Srgrimes m->m_flags &= ~M_PKTHDR; 6071541Srgrimes } 6081541Srgrimes mn->m_next = m; 6091541Srgrimes m = mn; 6101541Srgrimes if (len < MHLEN) 6111541Srgrimes MH_ALIGN(m, len); 6121541Srgrimes m->m_len = len; 6131541Srgrimes return (m); 6141541Srgrimes} 6151541Srgrimes 6161541Srgrimes/* 6171541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 6181541Srgrimes * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 61970254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 62054002Sarchie * Note that the copy is read-only, because clusters are not copied, 62154002Sarchie * only their reference counts are incremented. 6221541Srgrimes */ 62323081Swollman#define MCFail (mbstat.m_mcfail) 6241541Srgrimes 6251541Srgrimesstruct mbuf * 6261541Srgrimesm_copym(m, off0, len, wait) 6271541Srgrimes register struct mbuf *m; 6281541Srgrimes int off0, wait; 6291541Srgrimes register int len; 6301541Srgrimes{ 6311541Srgrimes register struct mbuf *n, **np; 6321541Srgrimes register int off = off0; 6331541Srgrimes struct mbuf *top; 6341541Srgrimes int copyhdr = 0; 6351541Srgrimes 63652201Salfred KASSERT(off >= 0, ("m_copym, negative off %d", off)); 63752201Salfred KASSERT(len >= 0, ("m_copym, negative len %d", len)); 6381541Srgrimes if (off == 0 && m->m_flags & M_PKTHDR) 6391541Srgrimes copyhdr = 1; 6401541Srgrimes while (off > 0) { 64152201Salfred KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 6421541Srgrimes if (off < m->m_len) 6431541Srgrimes break; 6441541Srgrimes off -= m->m_len; 6451541Srgrimes m = m->m_next; 6461541Srgrimes } 6471541Srgrimes np = ⊤ 6481541Srgrimes top = 0; 6491541Srgrimes while (len > 0) { 6501541Srgrimes if (m == 0) { 65152201Salfred KASSERT(len == M_COPYALL, 65252201Salfred ("m_copym, length > size of mbuf chain")); 6531541Srgrimes break; 6541541Srgrimes } 6551541Srgrimes MGET(n, wait, m->m_type); 6561541Srgrimes *np = n; 6571541Srgrimes if (n == 0) 6581541Srgrimes goto nospace; 6591541Srgrimes if (copyhdr) { 6601541Srgrimes M_COPY_PKTHDR(n, m); 6611541Srgrimes if (len == M_COPYALL) 6621541Srgrimes n->m_pkthdr.len -= off0; 6631541Srgrimes else 6641541Srgrimes n->m_pkthdr.len = len; 6651541Srgrimes copyhdr = 0; 6661541Srgrimes } 6671541Srgrimes n->m_len = min(len, m->m_len - off); 6681541Srgrimes if (m->m_flags & M_EXT) { 6691541Srgrimes n->m_data = m->m_data + off; 6701541Srgrimes n->m_ext = m->m_ext; 6711541Srgrimes n->m_flags |= M_EXT; 67264837Sdwmalone MEXT_ADD_REF(m); 6731541Srgrimes } else 6741541Srgrimes bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 6751541Srgrimes (unsigned)n->m_len); 6761541Srgrimes if (len != M_COPYALL) 6771541Srgrimes len -= n->m_len; 6781541Srgrimes off = 0; 6791541Srgrimes m = m->m_next; 6801541Srgrimes np = &n->m_next; 6811541Srgrimes } 6821541Srgrimes if (top == 0) 68366475Sbmilekic atomic_add_long(&MCFail, 1); 6841541Srgrimes return (top); 6851541Srgrimesnospace: 6861541Srgrimes m_freem(top); 68766475Sbmilekic atomic_add_long(&MCFail, 1); 6881541Srgrimes return (0); 6891541Srgrimes} 6901541Srgrimes 6911541Srgrimes/* 69215689Swollman * Copy an entire packet, including header (which must be present). 69315689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 69454002Sarchie * Note that the copy is read-only, because clusters are not copied, 69554002Sarchie * only their reference counts are incremented. 69615689Swollman */ 69715689Swollmanstruct mbuf * 69815689Swollmanm_copypacket(m, how) 69915689Swollman struct mbuf *m; 70015689Swollman int how; 70115689Swollman{ 70215689Swollman struct mbuf *top, *n, *o; 70315689Swollman 70415689Swollman MGET(n, how, m->m_type); 70515689Swollman top = n; 70615689Swollman if (!n) 70715689Swollman goto nospace; 70815689Swollman 70915689Swollman M_COPY_PKTHDR(n, m); 71015689Swollman n->m_len = m->m_len; 71115689Swollman if (m->m_flags & M_EXT) { 71215689Swollman n->m_data = m->m_data; 71315689Swollman n->m_ext = m->m_ext; 71415689Swollman n->m_flags |= M_EXT; 71564837Sdwmalone MEXT_ADD_REF(m); 71615689Swollman } else { 71715689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 71815689Swollman } 71915689Swollman 72015689Swollman m = m->m_next; 72115689Swollman while (m) { 72215689Swollman MGET(o, how, m->m_type); 72315689Swollman if (!o) 72415689Swollman goto nospace; 72515689Swollman 72615689Swollman n->m_next = o; 72715689Swollman n = n->m_next; 72815689Swollman 72915689Swollman n->m_len = m->m_len; 73015689Swollman if (m->m_flags & M_EXT) { 73115689Swollman n->m_data = m->m_data; 73215689Swollman n->m_ext = m->m_ext; 73315689Swollman n->m_flags |= M_EXT; 73464837Sdwmalone MEXT_ADD_REF(m); 73515689Swollman } else { 73615689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 73715689Swollman } 73815689Swollman 73915689Swollman m = m->m_next; 74015689Swollman } 74115689Swollman return top; 74215689Swollmannospace: 74315689Swollman m_freem(top); 74466475Sbmilekic atomic_add_long(&MCFail, 1); 74515689Swollman return 0; 74615689Swollman} 74715689Swollman 74815689Swollman/* 7491541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning, 7501541Srgrimes * continuing for "len" bytes, into the indicated buffer. 7511541Srgrimes */ 7521549Srgrimesvoid 7531541Srgrimesm_copydata(m, off, len, cp) 7541541Srgrimes register struct mbuf *m; 7551541Srgrimes register int off; 7561541Srgrimes register int len; 7571541Srgrimes caddr_t cp; 7581541Srgrimes{ 7591541Srgrimes register unsigned count; 7601541Srgrimes 76152201Salfred KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 76252201Salfred KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 7631541Srgrimes while (off > 0) { 76452201Salfred KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 7651541Srgrimes if (off < m->m_len) 7661541Srgrimes break; 7671541Srgrimes off -= m->m_len; 7681541Srgrimes m = m->m_next; 7691541Srgrimes } 7701541Srgrimes while (len > 0) { 77152201Salfred KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 7721541Srgrimes count = min(m->m_len - off, len); 7731541Srgrimes bcopy(mtod(m, caddr_t) + off, cp, count); 7741541Srgrimes len -= count; 7751541Srgrimes cp += count; 7761541Srgrimes off = 0; 7771541Srgrimes m = m->m_next; 7781541Srgrimes } 7791541Srgrimes} 7801541Srgrimes 7811541Srgrimes/* 78254002Sarchie * Copy a packet header mbuf chain into a completely new chain, including 78354002Sarchie * copying any mbuf clusters. Use this instead of m_copypacket() when 78454002Sarchie * you need a writable copy of an mbuf chain. 78554002Sarchie */ 78654002Sarchiestruct mbuf * 78754002Sarchiem_dup(m, how) 78854002Sarchie struct mbuf *m; 78954002Sarchie int how; 79054002Sarchie{ 79154002Sarchie struct mbuf **p, *top = NULL; 79254002Sarchie int remain, moff, nsize; 79354002Sarchie 79454002Sarchie /* Sanity check */ 79554002Sarchie if (m == NULL) 79654002Sarchie return (0); 79754002Sarchie KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 79854002Sarchie 79954002Sarchie /* While there's more data, get a new mbuf, tack it on, and fill it */ 80054002Sarchie remain = m->m_pkthdr.len; 80154002Sarchie moff = 0; 80254002Sarchie p = ⊤ 80354002Sarchie while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 80454002Sarchie struct mbuf *n; 80554002Sarchie 80654002Sarchie /* Get the next new mbuf */ 80754002Sarchie MGET(n, how, m->m_type); 80854002Sarchie if (n == NULL) 80954002Sarchie goto nospace; 81054002Sarchie if (top == NULL) { /* first one, must be PKTHDR */ 81154002Sarchie M_COPY_PKTHDR(n, m); 81254002Sarchie nsize = MHLEN; 81354002Sarchie } else /* not the first one */ 81454002Sarchie nsize = MLEN; 81554002Sarchie if (remain >= MINCLSIZE) { 81654002Sarchie MCLGET(n, how); 81754002Sarchie if ((n->m_flags & M_EXT) == 0) { 81854002Sarchie (void)m_free(n); 81954002Sarchie goto nospace; 82054002Sarchie } 82154002Sarchie nsize = MCLBYTES; 82254002Sarchie } 82354002Sarchie n->m_len = 0; 82454002Sarchie 82554002Sarchie /* Link it into the new chain */ 82654002Sarchie *p = n; 82754002Sarchie p = &n->m_next; 82854002Sarchie 82954002Sarchie /* Copy data from original mbuf(s) into new mbuf */ 83054002Sarchie while (n->m_len < nsize && m != NULL) { 83154002Sarchie int chunk = min(nsize - n->m_len, m->m_len - moff); 83254002Sarchie 83354002Sarchie bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 83454002Sarchie moff += chunk; 83554002Sarchie n->m_len += chunk; 83654002Sarchie remain -= chunk; 83754002Sarchie if (moff == m->m_len) { 83854002Sarchie m = m->m_next; 83954002Sarchie moff = 0; 84054002Sarchie } 84154002Sarchie } 84254002Sarchie 84354002Sarchie /* Check correct total mbuf length */ 84454002Sarchie KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 84554002Sarchie ("%s: bogus m_pkthdr.len", __FUNCTION__)); 84654002Sarchie } 84754002Sarchie return (top); 84854002Sarchie 84954002Sarchienospace: 85054002Sarchie m_freem(top); 85166475Sbmilekic atomic_add_long(&MCFail, 1); 85254002Sarchie return (0); 85354002Sarchie} 85454002Sarchie 85554002Sarchie/* 8561541Srgrimes * Concatenate mbuf chain n to m. 8571541Srgrimes * Both chains must be of the same type (e.g. MT_DATA). 8581541Srgrimes * Any m_pkthdr is not updated. 8591541Srgrimes */ 8601549Srgrimesvoid 8611541Srgrimesm_cat(m, n) 8621541Srgrimes register struct mbuf *m, *n; 8631541Srgrimes{ 8641541Srgrimes while (m->m_next) 8651541Srgrimes m = m->m_next; 8661541Srgrimes while (n) { 8671541Srgrimes if (m->m_flags & M_EXT || 8681541Srgrimes m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 8691541Srgrimes /* just join the two chains */ 8701541Srgrimes m->m_next = n; 8711541Srgrimes return; 8721541Srgrimes } 8731541Srgrimes /* splat the data from one into the other */ 8741541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 8751541Srgrimes (u_int)n->m_len); 8761541Srgrimes m->m_len += n->m_len; 8771541Srgrimes n = m_free(n); 8781541Srgrimes } 8791541Srgrimes} 8801541Srgrimes 8811549Srgrimesvoid 8821541Srgrimesm_adj(mp, req_len) 8831541Srgrimes struct mbuf *mp; 8841541Srgrimes int req_len; 8851541Srgrimes{ 8861541Srgrimes register int len = req_len; 8871541Srgrimes register struct mbuf *m; 88833678Sbde register int count; 8891541Srgrimes 8901541Srgrimes if ((m = mp) == NULL) 8911541Srgrimes return; 8921541Srgrimes if (len >= 0) { 8931541Srgrimes /* 8941541Srgrimes * Trim from head. 8951541Srgrimes */ 8961541Srgrimes while (m != NULL && len > 0) { 8971541Srgrimes if (m->m_len <= len) { 8981541Srgrimes len -= m->m_len; 8991541Srgrimes m->m_len = 0; 9001541Srgrimes m = m->m_next; 9011541Srgrimes } else { 9021541Srgrimes m->m_len -= len; 9031541Srgrimes m->m_data += len; 9041541Srgrimes len = 0; 9051541Srgrimes } 9061541Srgrimes } 9071541Srgrimes m = mp; 9081541Srgrimes if (mp->m_flags & M_PKTHDR) 9091541Srgrimes m->m_pkthdr.len -= (req_len - len); 9101541Srgrimes } else { 9111541Srgrimes /* 9121541Srgrimes * Trim from tail. Scan the mbuf chain, 9131541Srgrimes * calculating its length and finding the last mbuf. 9141541Srgrimes * If the adjustment only affects this mbuf, then just 9151541Srgrimes * adjust and return. Otherwise, rescan and truncate 9161541Srgrimes * after the remaining size. 9171541Srgrimes */ 9181541Srgrimes len = -len; 9191541Srgrimes count = 0; 9201541Srgrimes for (;;) { 9211541Srgrimes count += m->m_len; 9221541Srgrimes if (m->m_next == (struct mbuf *)0) 9231541Srgrimes break; 9241541Srgrimes m = m->m_next; 9251541Srgrimes } 9261541Srgrimes if (m->m_len >= len) { 9271541Srgrimes m->m_len -= len; 9281541Srgrimes if (mp->m_flags & M_PKTHDR) 9291541Srgrimes mp->m_pkthdr.len -= len; 9301541Srgrimes return; 9311541Srgrimes } 9321541Srgrimes count -= len; 9331541Srgrimes if (count < 0) 9341541Srgrimes count = 0; 9351541Srgrimes /* 9361541Srgrimes * Correct length for chain is "count". 9371541Srgrimes * Find the mbuf with last data, adjust its length, 9381541Srgrimes * and toss data from remaining mbufs on chain. 9391541Srgrimes */ 9401541Srgrimes m = mp; 9411541Srgrimes if (m->m_flags & M_PKTHDR) 9421541Srgrimes m->m_pkthdr.len = count; 9431541Srgrimes for (; m; m = m->m_next) { 9441541Srgrimes if (m->m_len >= count) { 9451541Srgrimes m->m_len = count; 9461541Srgrimes break; 9471541Srgrimes } 9481541Srgrimes count -= m->m_len; 9491541Srgrimes } 9503308Sphk while (m->m_next) 9513308Sphk (m = m->m_next) ->m_len = 0; 9521541Srgrimes } 9531541Srgrimes} 9541541Srgrimes 9551541Srgrimes/* 9561541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous 9571541Srgrimes * and in the data area of an mbuf (so that mtod and dtom 9581541Srgrimes * will work for a structure of size len). Returns the resulting 9591541Srgrimes * mbuf chain on success, frees it and returns null on failure. 9601541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the 9611541Srgrimes * contiguous region in an attempt to avoid being called next time. 9621541Srgrimes */ 96323081Swollman#define MPFail (mbstat.m_mpfail) 9641541Srgrimes 9651541Srgrimesstruct mbuf * 9661541Srgrimesm_pullup(n, len) 9671541Srgrimes register struct mbuf *n; 9681541Srgrimes int len; 9691541Srgrimes{ 9701541Srgrimes register struct mbuf *m; 9711541Srgrimes register int count; 9721541Srgrimes int space; 9731541Srgrimes 9741541Srgrimes /* 9751541Srgrimes * If first mbuf has no cluster, and has room for len bytes 9761541Srgrimes * without shifting current data, pullup into it, 9771541Srgrimes * otherwise allocate a new mbuf to prepend to the chain. 9781541Srgrimes */ 9791541Srgrimes if ((n->m_flags & M_EXT) == 0 && 9801541Srgrimes n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 9811541Srgrimes if (n->m_len >= len) 9821541Srgrimes return (n); 9831541Srgrimes m = n; 9841541Srgrimes n = n->m_next; 9851541Srgrimes len -= m->m_len; 9861541Srgrimes } else { 9871541Srgrimes if (len > MHLEN) 9881541Srgrimes goto bad; 9891541Srgrimes MGET(m, M_DONTWAIT, n->m_type); 9901541Srgrimes if (m == 0) 9911541Srgrimes goto bad; 9921541Srgrimes m->m_len = 0; 9931541Srgrimes if (n->m_flags & M_PKTHDR) { 9941541Srgrimes M_COPY_PKTHDR(m, n); 9951541Srgrimes n->m_flags &= ~M_PKTHDR; 9961541Srgrimes } 9971541Srgrimes } 9981541Srgrimes space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 9991541Srgrimes do { 10001541Srgrimes count = min(min(max(len, max_protohdr), space), n->m_len); 10011541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 10021541Srgrimes (unsigned)count); 10031541Srgrimes len -= count; 10041541Srgrimes m->m_len += count; 10051541Srgrimes n->m_len -= count; 10061541Srgrimes space -= count; 10071541Srgrimes if (n->m_len) 10081541Srgrimes n->m_data += count; 10091541Srgrimes else 10101541Srgrimes n = m_free(n); 10111541Srgrimes } while (len > 0 && n); 10121541Srgrimes if (len > 0) { 10131541Srgrimes (void) m_free(m); 10141541Srgrimes goto bad; 10151541Srgrimes } 10161541Srgrimes m->m_next = n; 10171541Srgrimes return (m); 10181541Srgrimesbad: 10191541Srgrimes m_freem(n); 102066475Sbmilekic atomic_add_long(&MPFail, 1); 10211541Srgrimes return (0); 10221541Srgrimes} 10231541Srgrimes 10241541Srgrimes/* 10251541Srgrimes * Partition an mbuf chain in two pieces, returning the tail -- 10261541Srgrimes * all but the first len0 bytes. In case of failure, it returns NULL and 10271541Srgrimes * attempts to restore the chain to its original state. 10281541Srgrimes */ 10291541Srgrimesstruct mbuf * 10301541Srgrimesm_split(m0, len0, wait) 10311541Srgrimes register struct mbuf *m0; 10321541Srgrimes int len0, wait; 10331541Srgrimes{ 10341541Srgrimes register struct mbuf *m, *n; 10351541Srgrimes unsigned len = len0, remain; 10361541Srgrimes 10371541Srgrimes for (m = m0; m && len > m->m_len; m = m->m_next) 10381541Srgrimes len -= m->m_len; 10391541Srgrimes if (m == 0) 10401541Srgrimes return (0); 10411541Srgrimes remain = m->m_len - len; 10421541Srgrimes if (m0->m_flags & M_PKTHDR) { 10431541Srgrimes MGETHDR(n, wait, m0->m_type); 10441541Srgrimes if (n == 0) 10451541Srgrimes return (0); 10461541Srgrimes n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 10471541Srgrimes n->m_pkthdr.len = m0->m_pkthdr.len - len0; 10481541Srgrimes m0->m_pkthdr.len = len0; 10491541Srgrimes if (m->m_flags & M_EXT) 10501541Srgrimes goto extpacket; 10511541Srgrimes if (remain > MHLEN) { 10521541Srgrimes /* m can't be the lead packet */ 10531541Srgrimes MH_ALIGN(n, 0); 10541541Srgrimes n->m_next = m_split(m, len, wait); 10551541Srgrimes if (n->m_next == 0) { 10561541Srgrimes (void) m_free(n); 10571541Srgrimes return (0); 10581541Srgrimes } else 10591541Srgrimes return (n); 10601541Srgrimes } else 10611541Srgrimes MH_ALIGN(n, remain); 10621541Srgrimes } else if (remain == 0) { 10631541Srgrimes n = m->m_next; 10641541Srgrimes m->m_next = 0; 10651541Srgrimes return (n); 10661541Srgrimes } else { 10671541Srgrimes MGET(n, wait, m->m_type); 10681541Srgrimes if (n == 0) 10691541Srgrimes return (0); 10701541Srgrimes M_ALIGN(n, remain); 10711541Srgrimes } 10721541Srgrimesextpacket: 10731541Srgrimes if (m->m_flags & M_EXT) { 10741541Srgrimes n->m_flags |= M_EXT; 10751541Srgrimes n->m_ext = m->m_ext; 107664837Sdwmalone MEXT_ADD_REF(m); 10771541Srgrimes m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 10781541Srgrimes n->m_data = m->m_data + len; 10791541Srgrimes } else { 10801541Srgrimes bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 10811541Srgrimes } 10821541Srgrimes n->m_len = remain; 10831541Srgrimes m->m_len = len; 10841541Srgrimes n->m_next = m->m_next; 10851541Srgrimes m->m_next = 0; 10861541Srgrimes return (n); 10871541Srgrimes} 10881541Srgrimes/* 10891541Srgrimes * Routine to copy from device local memory into mbufs. 10901541Srgrimes */ 10911541Srgrimesstruct mbuf * 10921541Srgrimesm_devget(buf, totlen, off0, ifp, copy) 10931541Srgrimes char *buf; 10941541Srgrimes int totlen, off0; 10951541Srgrimes struct ifnet *ifp; 109612577Sbde void (*copy) __P((char *from, caddr_t to, u_int len)); 10971541Srgrimes{ 10981541Srgrimes register struct mbuf *m; 10991541Srgrimes struct mbuf *top = 0, **mp = ⊤ 11001541Srgrimes register int off = off0, len; 11011541Srgrimes register char *cp; 11021541Srgrimes char *epkt; 11031541Srgrimes 11041541Srgrimes cp = buf; 11051541Srgrimes epkt = cp + totlen; 11061541Srgrimes if (off) { 11071541Srgrimes cp += off + 2 * sizeof(u_short); 11081541Srgrimes totlen -= 2 * sizeof(u_short); 11091541Srgrimes } 11101541Srgrimes MGETHDR(m, M_DONTWAIT, MT_DATA); 11111541Srgrimes if (m == 0) 11121541Srgrimes return (0); 11131541Srgrimes m->m_pkthdr.rcvif = ifp; 11141541Srgrimes m->m_pkthdr.len = totlen; 11151541Srgrimes m->m_len = MHLEN; 11161541Srgrimes 11171541Srgrimes while (totlen > 0) { 11181541Srgrimes if (top) { 11191541Srgrimes MGET(m, M_DONTWAIT, MT_DATA); 11201541Srgrimes if (m == 0) { 11211541Srgrimes m_freem(top); 11221541Srgrimes return (0); 11231541Srgrimes } 11241541Srgrimes m->m_len = MLEN; 11251541Srgrimes } 11261541Srgrimes len = min(totlen, epkt - cp); 11271541Srgrimes if (len >= MINCLSIZE) { 11281541Srgrimes MCLGET(m, M_DONTWAIT); 11291541Srgrimes if (m->m_flags & M_EXT) 11301541Srgrimes m->m_len = len = min(len, MCLBYTES); 11311541Srgrimes else 11321541Srgrimes len = m->m_len; 11331541Srgrimes } else { 11341541Srgrimes /* 11351541Srgrimes * Place initial small packet/header at end of mbuf. 11361541Srgrimes */ 11371541Srgrimes if (len < m->m_len) { 11381541Srgrimes if (top == 0 && len + max_linkhdr <= m->m_len) 11391541Srgrimes m->m_data += max_linkhdr; 11401541Srgrimes m->m_len = len; 11411541Srgrimes } else 11421541Srgrimes len = m->m_len; 11431541Srgrimes } 11441541Srgrimes if (copy) 11451541Srgrimes copy(cp, mtod(m, caddr_t), (unsigned)len); 11461541Srgrimes else 11471541Srgrimes bcopy(cp, mtod(m, caddr_t), (unsigned)len); 11481541Srgrimes cp += len; 11491541Srgrimes *mp = m; 11501541Srgrimes mp = &m->m_next; 11511541Srgrimes totlen -= len; 11521541Srgrimes if (cp == epkt) 11531541Srgrimes cp = buf; 11541541Srgrimes } 11551541Srgrimes return (top); 11561541Srgrimes} 11573352Sphk 11583352Sphk/* 11593352Sphk * Copy data from a buffer back into the indicated mbuf chain, 11603352Sphk * starting "off" bytes from the beginning, extending the mbuf 11613352Sphk * chain if necessary. 11623352Sphk */ 11633352Sphkvoid 11643352Sphkm_copyback(m0, off, len, cp) 11653352Sphk struct mbuf *m0; 11663352Sphk register int off; 11673352Sphk register int len; 11683352Sphk caddr_t cp; 11693352Sphk{ 11703352Sphk register int mlen; 11713352Sphk register struct mbuf *m = m0, *n; 11723352Sphk int totlen = 0; 11733352Sphk 11743352Sphk if (m0 == 0) 11753352Sphk return; 11763352Sphk while (off > (mlen = m->m_len)) { 11773352Sphk off -= mlen; 11783352Sphk totlen += mlen; 11793352Sphk if (m->m_next == 0) { 11803352Sphk n = m_getclr(M_DONTWAIT, m->m_type); 11813352Sphk if (n == 0) 11823352Sphk goto out; 11833352Sphk n->m_len = min(MLEN, len + off); 11843352Sphk m->m_next = n; 11853352Sphk } 11863352Sphk m = m->m_next; 11873352Sphk } 11883352Sphk while (len > 0) { 11893352Sphk mlen = min (m->m_len - off, len); 11903352Sphk bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 11913352Sphk cp += mlen; 11923352Sphk len -= mlen; 11933352Sphk mlen += off; 11943352Sphk off = 0; 11953352Sphk totlen += mlen; 11963352Sphk if (len == 0) 11973352Sphk break; 11983352Sphk if (m->m_next == 0) { 11993352Sphk n = m_get(M_DONTWAIT, m->m_type); 12003352Sphk if (n == 0) 12013352Sphk break; 12023352Sphk n->m_len = min(MLEN, len); 12033352Sphk m->m_next = n; 12043352Sphk } 12053352Sphk m = m->m_next; 12063352Sphk } 12073352Sphkout: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 12083352Sphk m->m_pkthdr.len = totlen; 12093352Sphk} 121052756Sphk 121152756Sphkvoid 121252756Sphkm_print(const struct mbuf *m) 121352756Sphk{ 121452756Sphk int len; 121554906Seivind const struct mbuf *m2; 121652756Sphk 121752756Sphk len = m->m_pkthdr.len; 121852756Sphk m2 = m; 121952756Sphk while (len) { 122052756Sphk printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 122152756Sphk len -= m2->m_len; 122252756Sphk m2 = m2->m_next; 122352756Sphk } 122452756Sphk return; 122552756Sphk} 1226