uipc_mbuf.c revision 75105
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 331541Srgrimes * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 75105 2001-04-03 03:15:11Z alfred $ 351541Srgrimes */ 361541Srgrimes 3748579Smsmith#include "opt_param.h" 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 4032036Sbde#include <sys/malloc.h> 411541Srgrimes#include <sys/mbuf.h> 4267365Sjhb#include <sys/mutex.h> 431541Srgrimes#include <sys/kernel.h> 4423081Swollman#include <sys/sysctl.h> 451541Srgrimes#include <sys/domain.h> 461541Srgrimes#include <sys/protosw.h> 471541Srgrimes#include <vm/vm.h> 489759Sbde#include <vm/vm_kern.h> 4912662Sdg#include <vm/vm_extern.h> 501541Srgrimes 5172356Sbmilekicstatic void mbinit(void *); 5210358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 5310358Sjulian 549759Sbdestruct mbuf *mbutl; 559759Sbdestruct mbstat mbstat; 5663203Salfredu_long mbtypes[MT_NTYPES]; 579759Sbdeint max_linkhdr; 589759Sbdeint max_protohdr; 599759Sbdeint max_hdr; 609759Sbdeint max_datalen; 6148579Smsmithint nmbclusters; 6248579Smsmithint nmbufs; 6367144Sbmilekicint nmbcnt; 6466475Sbmilekicu_long m_mballoc_wid = 0; 6566475Sbmilekicu_long m_clalloc_wid = 0; 661541Srgrimes 6766475Sbmilekic/* 6866475Sbmilekic * freelist header structures... 6966475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst 7066475Sbmilekic */ 7166475Sbmilekicstruct mbffree_lst mmbfree; 7266475Sbmilekicstruct mclfree_lst mclfree; 7366475Sbmilekicstruct mcntfree_lst mcntfree; 7475105Salfredstruct mtx mbuf_mtx; 7566475Sbmilekic 7666475Sbmilekic/* 7766475Sbmilekic * sysctl(8) exported objects 7866475Sbmilekic */ 7944078SdfrSYSCTL_DECL(_kern_ipc); 8023081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 8123081Swollman &max_linkhdr, 0, ""); 8223081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 8323081Swollman &max_protohdr, 0, ""); 8423081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 8523081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 8623081Swollman &max_datalen, 0, ""); 8754478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 8854478Sgreen &mbuf_wait, 0, ""); 8964048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 9063203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 9163203Salfred sizeof(mbtypes), "LU", ""); 9248579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 9355171Smsmith &nmbclusters, 0, "Maximum number of mbuf clusters available"); 9455171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 9555171Smsmith "Maximum number of mbufs available"); 9667144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 9767144Sbmilekic "Maximum number of ext_buf counters available"); 9848579Smsmith#ifndef NMBCLUSTERS 9948579Smsmith#define NMBCLUSTERS (512 + MAXUSERS * 16) 10048579Smsmith#endif 10148579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 10255171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 10367144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 10423081Swollman 10572356Sbmilekicstatic void m_reclaim(void); 10612819Sphk 10766475Sbmilekic/* Initial allocation numbers */ 10864837Sdwmalone#define NCL_INIT 2 10915744Sphk#define NMB_INIT 16 11066475Sbmilekic#define REF_INIT NMBCLUSTERS 11115744Sphk 11266475Sbmilekic/* 11366475Sbmilekic * Full mbuf subsystem initialization done here. 11466475Sbmilekic * 11566475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to 11666475Sbmilekic * machdep.c - for now, there is no reason for this stuff to go there. 11766475Sbmilekic */ 11810358Sjulianstatic void 11972356Sbmilekicmbinit(void *dummy) 1201541Srgrimes{ 12166475Sbmilekic vm_offset_t maxaddr, mb_map_size; 1221541Srgrimes 12366475Sbmilekic /* 12466475Sbmilekic * Setup the mb_map, allocate requested VM space. 12566475Sbmilekic */ 12667144Sbmilekic mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt 12766475Sbmilekic * sizeof(union mext_refcnt); 12866475Sbmilekic mb_map_size = roundup2(mb_map_size, PAGE_SIZE); 12966475Sbmilekic mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 13066475Sbmilekic mb_map_size); 13172356Sbmilekic /* XXX XXX XXX: mb_map->system_map = 1; */ 13264837Sdwmalone 13366475Sbmilekic /* 13466475Sbmilekic * Initialize the free list headers, and setup locks for lists. 13566475Sbmilekic */ 13666475Sbmilekic mmbfree.m_head = NULL; 13766475Sbmilekic mclfree.m_head = NULL; 13866475Sbmilekic mcntfree.m_head = NULL; 13975105Salfred mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 14066475Sbmilekic 14166475Sbmilekic /* 14266475Sbmilekic * Initialize mbuf subsystem (sysctl exported) statistics structure. 14366475Sbmilekic */ 14423081Swollman mbstat.m_msize = MSIZE; 14523081Swollman mbstat.m_mclbytes = MCLBYTES; 14623081Swollman mbstat.m_minclsize = MINCLSIZE; 14723081Swollman mbstat.m_mlen = MLEN; 14823081Swollman mbstat.m_mhlen = MHLEN; 14923081Swollman 15066475Sbmilekic /* 15166475Sbmilekic * Perform some initial allocations. 15266475Sbmilekic */ 15375105Salfred mtx_lock(&mbuf_mtx); 15466475Sbmilekic if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 15564837Sdwmalone goto bad; 15615689Swollman if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 15715689Swollman goto bad; 1581541Srgrimes if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 1591541Srgrimes goto bad; 16075105Salfred mtx_unlock(&mbuf_mtx); 16166475Sbmilekic 1621541Srgrimes return; 1631541Srgrimesbad: 16464837Sdwmalone panic("mbinit: failed to initialize mbuf subsystem!"); 1651541Srgrimes} 1661541Srgrimes 1671541Srgrimes/* 16864837Sdwmalone * Allocate at least nmb reference count structs and place them 16964837Sdwmalone * on the ref cnt free list. 17066475Sbmilekic * 17166475Sbmilekic * Must be called with the mcntfree lock held. 17264837Sdwmalone */ 17364837Sdwmaloneint 17472356Sbmilekicm_alloc_ref(u_int nmb, int how) 17564837Sdwmalone{ 17664837Sdwmalone caddr_t p; 17764837Sdwmalone u_int nbytes; 17864837Sdwmalone int i; 17964837Sdwmalone 18064837Sdwmalone /* 18164837Sdwmalone * We don't cap the amount of memory that can be used 18264837Sdwmalone * by the reference counters, like we do for mbufs and 18366475Sbmilekic * mbuf clusters. In fact, we're absolutely sure that we 18466475Sbmilekic * won't ever be going over our allocated space. We keep enough 18566475Sbmilekic * space in mb_map to accomodate maximum values of allocatable 18666475Sbmilekic * external buffers including, but not limited to, clusters. 18766475Sbmilekic * (That's also why we won't have to have wait routines for 18866475Sbmilekic * counters). 18966475Sbmilekic * 19066475Sbmilekic * If we're in here, we're absolutely certain to be returning 19166475Sbmilekic * succesfully, as long as there is physical memory to accomodate 19266475Sbmilekic * us. And if there isn't, but we're willing to wait, then 19366475Sbmilekic * kmem_malloc() will do the only waiting needed. 19464837Sdwmalone */ 19564837Sdwmalone 19664837Sdwmalone nbytes = round_page(nmb * sizeof(union mext_refcnt)); 19775105Salfred if (1 /* XXX: how == M_TRYWAIT */) 19875105Salfred mtx_unlock(&mbuf_mtx); 19970254Sbmilekic if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 20070254Sbmilekic M_WAITOK : M_NOWAIT)) == NULL) { 20175105Salfred if (1 /* XXX: how == M_TRYWAIT */) 20275105Salfred mtx_lock(&mbuf_mtx); 20364837Sdwmalone return (0); 20466475Sbmilekic } 20564837Sdwmalone nmb = nbytes / sizeof(union mext_refcnt); 20664837Sdwmalone 20766475Sbmilekic /* 20866475Sbmilekic * We don't let go of the mutex in order to avoid a race. 20966475Sbmilekic * It is up to the caller to let go of the mutex. 21066475Sbmilekic */ 21175105Salfred if (1 /* XXX: how == M_TRYWAIT */) 21275105Salfred mtx_lock(&mbuf_mtx); 21364837Sdwmalone for (i = 0; i < nmb; i++) { 21466475Sbmilekic ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 21566475Sbmilekic mcntfree.m_head = (union mext_refcnt *)p; 21664837Sdwmalone p += sizeof(union mext_refcnt); 21764837Sdwmalone mbstat.m_refree++; 21864837Sdwmalone } 21964837Sdwmalone mbstat.m_refcnt += nmb; 22064837Sdwmalone 22164837Sdwmalone return (1); 22264837Sdwmalone} 22364837Sdwmalone 22464837Sdwmalone/* 22515689Swollman * Allocate at least nmb mbufs and place on mbuf free list. 22666475Sbmilekic * 22766475Sbmilekic * Must be called with the mmbfree lock held. 22815689Swollman */ 22915689Swollmanint 23072356Sbmilekicm_mballoc(int nmb, int how) 23115689Swollman{ 23272356Sbmilekic caddr_t p; 23372356Sbmilekic int i; 23415689Swollman int nbytes; 23515689Swollman 23674402Sbmilekic nbytes = round_page(nmb * MSIZE); 23774402Sbmilekic nmb = nbytes / MSIZE; 23874402Sbmilekic 23954478Sgreen /* 24066475Sbmilekic * If we've hit the mbuf limit, stop allocating from mb_map. 24166475Sbmilekic * Also, once we run out of map space, it will be impossible to 24266475Sbmilekic * get any more (nothing is ever freed back to the map). 24355171Smsmith */ 24474764Sbmilekic if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 24555171Smsmith return (0); 24655171Smsmith 24775105Salfred if (1 /* XXX: how == M_TRYWAIT */) 24875105Salfred mtx_unlock(&mbuf_mtx); 24975105Salfred p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 25075105Salfred M_WAITOK : M_NOWAIT); 25175105Salfred if (1 /* XXX: how == M_TRYWAIT */) { 25275105Salfred mtx_lock(&mbuf_mtx); 25375105Salfred if (p == NULL) 25475105Salfred mbstat.m_wait++; 25522899Swollman } 25622899Swollman 25715689Swollman /* 25866475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 25915689Swollman * are no pages left. 26015689Swollman */ 26115689Swollman if (p == NULL) 26215689Swollman return (0); 26315689Swollman 26466475Sbmilekic /* 26566475Sbmilekic * We don't let go of the mutex in order to avoid a race. 26666475Sbmilekic * It is up to the caller to let go of the mutex when done 26766475Sbmilekic * with grabbing the mbuf from the free list. 26866475Sbmilekic */ 26915689Swollman for (i = 0; i < nmb; i++) { 27066475Sbmilekic ((struct mbuf *)p)->m_next = mmbfree.m_head; 27166475Sbmilekic mmbfree.m_head = (struct mbuf *)p; 27215689Swollman p += MSIZE; 27315689Swollman } 27415689Swollman mbstat.m_mbufs += nmb; 27563203Salfred mbtypes[MT_FREE] += nmb; 27615689Swollman return (1); 27715689Swollman} 27815689Swollman 27954478Sgreen/* 28054478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros 28170254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 28270254Sbmilekic * rely solely on reclaimed mbufs. 28366475Sbmilekic * 28466475Sbmilekic * Here we request for the protocols to free up some resources and, if we 28566475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a 28654478Sgreen * designated (mbuf_wait) time. 28766475Sbmilekic * 28870858Sbmilekic * Must be called with the mmbfree mutex held. 28954478Sgreen */ 29054478Sgreenstruct mbuf * 29166475Sbmilekicm_mballoc_wait(void) 29254478Sgreen{ 29366475Sbmilekic struct mbuf *p = NULL; 29454478Sgreen 29554478Sgreen /* 29666475Sbmilekic * See if we can drain some resources out of the protocols. 29770858Sbmilekic * We drop the mmbfree mutex to avoid recursing into it in some of 29870858Sbmilekic * the drain routines. Clearly, we're faced with a race here because 29970858Sbmilekic * once something is freed during the drain, it may be grabbed right 30070858Sbmilekic * from under us by some other thread. But we accept this possibility 30170858Sbmilekic * in order to avoid a potentially large lock recursion and, more 30270858Sbmilekic * importantly, to avoid a potential lock order reversal which may 30370858Sbmilekic * result in deadlock (See comment above m_reclaim()). 30454478Sgreen */ 30575105Salfred mtx_unlock(&mbuf_mtx); 30666475Sbmilekic m_reclaim(); 30770858Sbmilekic 30875105Salfred mtx_lock(&mbuf_mtx); 30966475Sbmilekic _MGET(p, M_DONTWAIT); 31054478Sgreen 31166475Sbmilekic if (p == NULL) { 31266475Sbmilekic m_mballoc_wid++; 31375105Salfred msleep(&m_mballoc_wid, &mbuf_mtx, PVM, "mballc", 31471302Sbmilekic mbuf_wait); 31571302Sbmilekic m_mballoc_wid--; 31666475Sbmilekic 31766475Sbmilekic /* 31866475Sbmilekic * Try again (one last time). 31966475Sbmilekic * 32066475Sbmilekic * We retry to fetch _even_ if the sleep timed out. This 32166475Sbmilekic * is left this way, purposely, in the [unlikely] case 32266475Sbmilekic * that an mbuf was freed but the sleep was not awoken 32366475Sbmilekic * in time. 32466475Sbmilekic * 32566475Sbmilekic * If the sleep didn't time out (i.e. we got woken up) then 32666475Sbmilekic * we have the lock so we just grab an mbuf, hopefully. 32766475Sbmilekic */ 32866475Sbmilekic _MGET(p, M_DONTWAIT); 32954478Sgreen } 33054478Sgreen 33166475Sbmilekic /* If we waited and got something... */ 33266475Sbmilekic if (p != NULL) { 33375105Salfred mbstat.m_wait++; 33466475Sbmilekic if (mmbfree.m_head != NULL) 33566475Sbmilekic MBWAKEUP(m_mballoc_wid); 33674764Sbmilekic } 33722671Swollman 33866475Sbmilekic return (p); 33922671Swollman} 34022671Swollman 34115689Swollman/* 3421541Srgrimes * Allocate some number of mbuf clusters 3431541Srgrimes * and place on cluster free list. 34466475Sbmilekic * 34566475Sbmilekic * Must be called with the mclfree lock held. 3461541Srgrimes */ 3471549Srgrimesint 34872356Sbmilekicm_clalloc(int ncl, int how) 3491541Srgrimes{ 35072356Sbmilekic caddr_t p; 35172356Sbmilekic int i; 35274402Sbmilekic int npg_sz; 3531541Srgrimes 35474402Sbmilekic npg_sz = round_page(ncl * MCLBYTES); 35574402Sbmilekic ncl = npg_sz / MCLBYTES; 35674402Sbmilekic 3577066Sdg /* 35866475Sbmilekic * If the map is now full (nothing will ever be freed to it). 35955171Smsmith * If we've hit the mcluster number limit, stop allocating from 36066475Sbmilekic * mb_map. 36155171Smsmith */ 36274764Sbmilekic if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 36355171Smsmith return (0); 36455171Smsmith 36575105Salfred if (1 /* XXX: how == M_TRYWAIT */) 36675105Salfred mtx_unlock(&mbuf_mtx); 36774402Sbmilekic p = (caddr_t)kmem_malloc(mb_map, npg_sz, 36870254Sbmilekic how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 36975105Salfred if (1 /* XXX: how == M_TRYWAIT */) 37075105Salfred mtx_lock(&mbuf_mtx); 37166475Sbmilekic 3727066Sdg /* 37366475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 3747066Sdg * are no pages left. 3757066Sdg */ 37674764Sbmilekic if (p == NULL) 3771541Srgrimes return (0); 3787066Sdg 3791541Srgrimes for (i = 0; i < ncl; i++) { 38066475Sbmilekic ((union mcluster *)p)->mcl_next = mclfree.m_head; 38166475Sbmilekic mclfree.m_head = (union mcluster *)p; 3821541Srgrimes p += MCLBYTES; 3831541Srgrimes mbstat.m_clfree++; 3841541Srgrimes } 3851541Srgrimes mbstat.m_clusters += ncl; 3861541Srgrimes return (1); 3871541Srgrimes} 3881541Srgrimes 3891541Srgrimes/* 39054478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with 39170254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 39254478Sgreen * sleep for a designated amount of time (mbuf_wait) or until we're woken up 39354478Sgreen * due to sudden mcluster availability. 39466475Sbmilekic * 39566475Sbmilekic * Must be called with the mclfree lock held. 39654478Sgreen */ 39754478Sgreencaddr_t 39854478Sgreenm_clalloc_wait(void) 39954478Sgreen{ 40066475Sbmilekic caddr_t p = NULL; 40154478Sgreen 40254478Sgreen m_clalloc_wid++; 40375105Salfred msleep(&m_clalloc_wid, &mbuf_mtx, PVM, "mclalc", mbuf_wait); 40471302Sbmilekic m_clalloc_wid--; 40554478Sgreen 40654478Sgreen /* 40766475Sbmilekic * Now that we (think) that we've got something, try again. 40854478Sgreen */ 40964837Sdwmalone _MCLALLOC(p, M_DONTWAIT); 41054478Sgreen 41166475Sbmilekic /* If we waited and got something ... */ 41266475Sbmilekic if (p != NULL) { 41375105Salfred mbstat.m_wait++; 41466475Sbmilekic if (mclfree.m_head != NULL) 41566475Sbmilekic MBWAKEUP(m_clalloc_wid); 41674764Sbmilekic } 41754478Sgreen 41854478Sgreen return (p); 41954478Sgreen} 42054478Sgreen 42154478Sgreen/* 42266475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources... 42366475Sbmilekic * 42470858Sbmilekic * XXX: No locks should be held going in here. The drain routines have 42570858Sbmilekic * to presently acquire some locks which raises the possibility of lock 42670858Sbmilekic * order violation if we're holding any mutex if that mutex is acquired in 42770858Sbmilekic * reverse order relative to one of the locks in the drain routines. 4281541Srgrimes */ 42912819Sphkstatic void 43072356Sbmilekicm_reclaim(void) 4311541Srgrimes{ 43272356Sbmilekic struct domain *dp; 43372356Sbmilekic struct protosw *pr; 4341541Srgrimes 43571089Sbmilekic#ifdef WITNESS 43671089Sbmilekic KASSERT(witness_list(CURPROC) == 0, 43771089Sbmilekic ("m_reclaim called with locks held")); 43871089Sbmilekic#endif 43971089Sbmilekic 4401541Srgrimes for (dp = domains; dp; dp = dp->dom_next) 4411541Srgrimes for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 4421541Srgrimes if (pr->pr_drain) 4431541Srgrimes (*pr->pr_drain)(); 4441541Srgrimes mbstat.m_drain++; 4451541Srgrimes} 4461541Srgrimes 4471541Srgrimes/* 4481541Srgrimes * Space allocation routines. 44972473Sbmilekic * Some of these are also available as macros 4501541Srgrimes * for critical paths. 4511541Srgrimes */ 4521541Srgrimesstruct mbuf * 45372356Sbmilekicm_get(int how, int type) 4541541Srgrimes{ 45572356Sbmilekic struct mbuf *m; 4561541Srgrimes 45732036Sbde MGET(m, how, type); 4581541Srgrimes return (m); 4591541Srgrimes} 4601541Srgrimes 4611541Srgrimesstruct mbuf * 46272356Sbmilekicm_gethdr(int how, int type) 4631541Srgrimes{ 46472356Sbmilekic struct mbuf *m; 4651541Srgrimes 46632036Sbde MGETHDR(m, how, type); 4671541Srgrimes return (m); 4681541Srgrimes} 4691541Srgrimes 4701541Srgrimesstruct mbuf * 47172356Sbmilekicm_getclr(int how, int type) 4721541Srgrimes{ 47372356Sbmilekic struct mbuf *m; 4741541Srgrimes 47532036Sbde MGET(m, how, type); 47675105Salfred if (m != NULL) 47775105Salfred bzero(mtod(m, caddr_t), MLEN); 4781541Srgrimes return (m); 4791541Srgrimes} 4801541Srgrimes 4811541Srgrimesstruct mbuf * 48272356Sbmilekicm_free(struct mbuf *m) 4831541Srgrimes{ 48472356Sbmilekic struct mbuf *n; 4851541Srgrimes 4861541Srgrimes MFREE(m, n); 4871541Srgrimes return (n); 4881541Srgrimes} 4891541Srgrimes 49072473Sbmilekic/* 49172473Sbmilekic * struct mbuf * 49272473Sbmilekic * m_getm(m, len, how, type) 49372473Sbmilekic * 49472473Sbmilekic * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 49572473Sbmilekic * best) and return a pointer to the top of the allocated chain. If m is 49672473Sbmilekic * non-null, then we assume that it is a single mbuf or an mbuf chain to 49772473Sbmilekic * which we want len bytes worth of mbufs and/or clusters attached, and so 49872473Sbmilekic * if we succeed in allocating it, we will just return a pointer to m. 49972473Sbmilekic * 50072473Sbmilekic * If we happen to fail at any point during the allocation, we will free 50172473Sbmilekic * up everything we have already allocated and return NULL. 50272473Sbmilekic * 50372473Sbmilekic */ 50472473Sbmilekicstruct mbuf * 50572473Sbmilekicm_getm(struct mbuf *m, int len, int how, int type) 50672473Sbmilekic{ 50772473Sbmilekic struct mbuf *top, *tail, *mp, *mtail = NULL; 50872473Sbmilekic 50972473Sbmilekic KASSERT(len >= 0, ("len is < 0 in m_getm")); 51072473Sbmilekic 51172789Sbp MGET(mp, how, type); 51272473Sbmilekic if (mp == NULL) 51372473Sbmilekic return (NULL); 51472473Sbmilekic else if (len > MINCLSIZE) { 51572473Sbmilekic MCLGET(mp, how); 51672473Sbmilekic if ((mp->m_flags & M_EXT) == 0) { 51772473Sbmilekic m_free(mp); 51872473Sbmilekic return (NULL); 51972473Sbmilekic } 52072473Sbmilekic } 52172473Sbmilekic mp->m_len = 0; 52272473Sbmilekic len -= M_TRAILINGSPACE(mp); 52372473Sbmilekic 52472473Sbmilekic if (m != NULL) 52572473Sbmilekic for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 52672473Sbmilekic else 52772473Sbmilekic m = mp; 52872473Sbmilekic 52972473Sbmilekic top = tail = mp; 53072473Sbmilekic while (len > 0) { 53172789Sbp MGET(mp, how, type); 53272473Sbmilekic if (mp == NULL) 53372473Sbmilekic goto failed; 53472473Sbmilekic 53572473Sbmilekic tail->m_next = mp; 53672473Sbmilekic tail = mp; 53772473Sbmilekic if (len > MINCLSIZE) { 53872473Sbmilekic MCLGET(mp, how); 53972473Sbmilekic if ((mp->m_flags & M_EXT) == 0) 54072473Sbmilekic goto failed; 54172473Sbmilekic } 54272473Sbmilekic 54372473Sbmilekic mp->m_len = 0; 54472473Sbmilekic len -= M_TRAILINGSPACE(mp); 54572473Sbmilekic } 54672473Sbmilekic 54772473Sbmilekic if (mtail != NULL) 54872473Sbmilekic mtail->m_next = top; 54972473Sbmilekic return (m); 55072473Sbmilekic 55172473Sbmilekicfailed: 55272473Sbmilekic m_freem(top); 55372473Sbmilekic return (NULL); 55472473Sbmilekic} 55572473Sbmilekic 5561541Srgrimesvoid 55772356Sbmilekicm_freem(struct mbuf *m) 5581541Srgrimes{ 55972356Sbmilekic struct mbuf *n; 5601541Srgrimes 5611541Srgrimes if (m == NULL) 5621541Srgrimes return; 5631541Srgrimes do { 56462587Sitojun /* 56562587Sitojun * we do need to check non-first mbuf, since some of existing 56662587Sitojun * code does not call M_PREPEND properly. 56762587Sitojun * (example: call to bpf_mtap from drivers) 56862587Sitojun */ 56962587Sitojun if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 57062587Sitojun m_freem(m->m_pkthdr.aux); 57162587Sitojun m->m_pkthdr.aux = NULL; 57262587Sitojun } 5731541Srgrimes MFREE(m, n); 5743308Sphk m = n; 5753308Sphk } while (m); 5761541Srgrimes} 5771541Srgrimes 5781541Srgrimes/* 5791541Srgrimes * Lesser-used path for M_PREPEND: 5801541Srgrimes * allocate new mbuf to prepend to chain, 5811541Srgrimes * copy junk along. 5821541Srgrimes */ 5831541Srgrimesstruct mbuf * 58472356Sbmilekicm_prepend(struct mbuf *m, int len, int how) 5851541Srgrimes{ 5861541Srgrimes struct mbuf *mn; 5871541Srgrimes 5881541Srgrimes MGET(mn, how, m->m_type); 58972356Sbmilekic if (mn == NULL) { 5901541Srgrimes m_freem(m); 59172356Sbmilekic return (NULL); 5921541Srgrimes } 5931541Srgrimes if (m->m_flags & M_PKTHDR) { 5941541Srgrimes M_COPY_PKTHDR(mn, m); 5951541Srgrimes m->m_flags &= ~M_PKTHDR; 5961541Srgrimes } 5971541Srgrimes mn->m_next = m; 5981541Srgrimes m = mn; 5991541Srgrimes if (len < MHLEN) 6001541Srgrimes MH_ALIGN(m, len); 6011541Srgrimes m->m_len = len; 6021541Srgrimes return (m); 6031541Srgrimes} 6041541Srgrimes 6051541Srgrimes/* 6061541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 6071541Srgrimes * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 60870254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 60954002Sarchie * Note that the copy is read-only, because clusters are not copied, 61054002Sarchie * only their reference counts are incremented. 6111541Srgrimes */ 6121541Srgrimesstruct mbuf * 61372356Sbmilekicm_copym(struct mbuf *m, int off0, int len, int wait) 6141541Srgrimes{ 61572356Sbmilekic struct mbuf *n, **np; 61672356Sbmilekic int off = off0; 6171541Srgrimes struct mbuf *top; 6181541Srgrimes int copyhdr = 0; 6191541Srgrimes 62052201Salfred KASSERT(off >= 0, ("m_copym, negative off %d", off)); 62152201Salfred KASSERT(len >= 0, ("m_copym, negative len %d", len)); 6221541Srgrimes if (off == 0 && m->m_flags & M_PKTHDR) 6231541Srgrimes copyhdr = 1; 6241541Srgrimes while (off > 0) { 62552201Salfred KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 6261541Srgrimes if (off < m->m_len) 6271541Srgrimes break; 6281541Srgrimes off -= m->m_len; 6291541Srgrimes m = m->m_next; 6301541Srgrimes } 6311541Srgrimes np = ⊤ 6321541Srgrimes top = 0; 6331541Srgrimes while (len > 0) { 63472356Sbmilekic if (m == NULL) { 63552201Salfred KASSERT(len == M_COPYALL, 63652201Salfred ("m_copym, length > size of mbuf chain")); 6371541Srgrimes break; 6381541Srgrimes } 6391541Srgrimes MGET(n, wait, m->m_type); 6401541Srgrimes *np = n; 64172356Sbmilekic if (n == NULL) 6421541Srgrimes goto nospace; 6431541Srgrimes if (copyhdr) { 6441541Srgrimes M_COPY_PKTHDR(n, m); 6451541Srgrimes if (len == M_COPYALL) 6461541Srgrimes n->m_pkthdr.len -= off0; 6471541Srgrimes else 6481541Srgrimes n->m_pkthdr.len = len; 6491541Srgrimes copyhdr = 0; 6501541Srgrimes } 6511541Srgrimes n->m_len = min(len, m->m_len - off); 6521541Srgrimes if (m->m_flags & M_EXT) { 6531541Srgrimes n->m_data = m->m_data + off; 6541541Srgrimes n->m_ext = m->m_ext; 6551541Srgrimes n->m_flags |= M_EXT; 65664837Sdwmalone MEXT_ADD_REF(m); 6571541Srgrimes } else 6581541Srgrimes bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 6591541Srgrimes (unsigned)n->m_len); 6601541Srgrimes if (len != M_COPYALL) 6611541Srgrimes len -= n->m_len; 6621541Srgrimes off = 0; 6631541Srgrimes m = m->m_next; 6641541Srgrimes np = &n->m_next; 6651541Srgrimes } 66675105Salfred if (top == NULL) { 66775105Salfred mtx_lock(&mbuf_mtx); 66875105Salfred mbstat.m_mcfail++; 66975105Salfred mtx_unlock(&mbuf_mtx); 67075105Salfred } 6711541Srgrimes return (top); 6721541Srgrimesnospace: 6731541Srgrimes m_freem(top); 67475105Salfred mtx_lock(&mbuf_mtx); 67575105Salfred mbstat.m_mcfail++; 67675105Salfred mtx_unlock(&mbuf_mtx); 67772356Sbmilekic return (NULL); 6781541Srgrimes} 6791541Srgrimes 6801541Srgrimes/* 68115689Swollman * Copy an entire packet, including header (which must be present). 68215689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 68354002Sarchie * Note that the copy is read-only, because clusters are not copied, 68454002Sarchie * only their reference counts are incremented. 68572750Sluigi * Preserve alignment of the first mbuf so if the creator has left 68672750Sluigi * some room at the beginning (e.g. for inserting protocol headers) 68772750Sluigi * the copies still have the room available. 68815689Swollman */ 68915689Swollmanstruct mbuf * 69072356Sbmilekicm_copypacket(struct mbuf *m, int how) 69115689Swollman{ 69215689Swollman struct mbuf *top, *n, *o; 69315689Swollman 69415689Swollman MGET(n, how, m->m_type); 69515689Swollman top = n; 69672356Sbmilekic if (n == NULL) 69715689Swollman goto nospace; 69815689Swollman 69915689Swollman M_COPY_PKTHDR(n, m); 70015689Swollman n->m_len = m->m_len; 70115689Swollman if (m->m_flags & M_EXT) { 70215689Swollman n->m_data = m->m_data; 70315689Swollman n->m_ext = m->m_ext; 70415689Swollman n->m_flags |= M_EXT; 70564837Sdwmalone MEXT_ADD_REF(m); 70615689Swollman } else { 70772750Sluigi n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 70815689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 70915689Swollman } 71015689Swollman 71115689Swollman m = m->m_next; 71215689Swollman while (m) { 71315689Swollman MGET(o, how, m->m_type); 71472356Sbmilekic if (o == NULL) 71515689Swollman goto nospace; 71615689Swollman 71715689Swollman n->m_next = o; 71815689Swollman n = n->m_next; 71915689Swollman 72015689Swollman n->m_len = m->m_len; 72115689Swollman if (m->m_flags & M_EXT) { 72215689Swollman n->m_data = m->m_data; 72315689Swollman n->m_ext = m->m_ext; 72415689Swollman n->m_flags |= M_EXT; 72564837Sdwmalone MEXT_ADD_REF(m); 72615689Swollman } else { 72715689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 72815689Swollman } 72915689Swollman 73015689Swollman m = m->m_next; 73115689Swollman } 73215689Swollman return top; 73315689Swollmannospace: 73415689Swollman m_freem(top); 73575105Salfred mtx_lock(&mbuf_mtx); 73675105Salfred mbstat.m_mcfail++; 73775105Salfred mtx_unlock(&mbuf_mtx); 73872356Sbmilekic return (NULL); 73915689Swollman} 74015689Swollman 74115689Swollman/* 7421541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning, 7431541Srgrimes * continuing for "len" bytes, into the indicated buffer. 7441541Srgrimes */ 7451549Srgrimesvoid 74672356Sbmilekicm_copydata(struct mbuf *m, int off, int len, caddr_t cp) 7471541Srgrimes{ 74872356Sbmilekic unsigned count; 7491541Srgrimes 75052201Salfred KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 75152201Salfred KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 7521541Srgrimes while (off > 0) { 75352201Salfred KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 7541541Srgrimes if (off < m->m_len) 7551541Srgrimes break; 7561541Srgrimes off -= m->m_len; 7571541Srgrimes m = m->m_next; 7581541Srgrimes } 7591541Srgrimes while (len > 0) { 76052201Salfred KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 7611541Srgrimes count = min(m->m_len - off, len); 7621541Srgrimes bcopy(mtod(m, caddr_t) + off, cp, count); 7631541Srgrimes len -= count; 7641541Srgrimes cp += count; 7651541Srgrimes off = 0; 7661541Srgrimes m = m->m_next; 7671541Srgrimes } 7681541Srgrimes} 7691541Srgrimes 7701541Srgrimes/* 77154002Sarchie * Copy a packet header mbuf chain into a completely new chain, including 77254002Sarchie * copying any mbuf clusters. Use this instead of m_copypacket() when 77354002Sarchie * you need a writable copy of an mbuf chain. 77454002Sarchie */ 77554002Sarchiestruct mbuf * 77672356Sbmilekicm_dup(struct mbuf *m, int how) 77754002Sarchie{ 77854002Sarchie struct mbuf **p, *top = NULL; 77954002Sarchie int remain, moff, nsize; 78054002Sarchie 78154002Sarchie /* Sanity check */ 78254002Sarchie if (m == NULL) 78372356Sbmilekic return (NULL); 78454002Sarchie KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 78554002Sarchie 78654002Sarchie /* While there's more data, get a new mbuf, tack it on, and fill it */ 78754002Sarchie remain = m->m_pkthdr.len; 78854002Sarchie moff = 0; 78954002Sarchie p = ⊤ 79054002Sarchie while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 79154002Sarchie struct mbuf *n; 79254002Sarchie 79354002Sarchie /* Get the next new mbuf */ 79454002Sarchie MGET(n, how, m->m_type); 79554002Sarchie if (n == NULL) 79654002Sarchie goto nospace; 79754002Sarchie if (top == NULL) { /* first one, must be PKTHDR */ 79854002Sarchie M_COPY_PKTHDR(n, m); 79954002Sarchie nsize = MHLEN; 80054002Sarchie } else /* not the first one */ 80154002Sarchie nsize = MLEN; 80254002Sarchie if (remain >= MINCLSIZE) { 80354002Sarchie MCLGET(n, how); 80454002Sarchie if ((n->m_flags & M_EXT) == 0) { 80554002Sarchie (void)m_free(n); 80654002Sarchie goto nospace; 80754002Sarchie } 80854002Sarchie nsize = MCLBYTES; 80954002Sarchie } 81054002Sarchie n->m_len = 0; 81154002Sarchie 81254002Sarchie /* Link it into the new chain */ 81354002Sarchie *p = n; 81454002Sarchie p = &n->m_next; 81554002Sarchie 81654002Sarchie /* Copy data from original mbuf(s) into new mbuf */ 81754002Sarchie while (n->m_len < nsize && m != NULL) { 81854002Sarchie int chunk = min(nsize - n->m_len, m->m_len - moff); 81954002Sarchie 82054002Sarchie bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 82154002Sarchie moff += chunk; 82254002Sarchie n->m_len += chunk; 82354002Sarchie remain -= chunk; 82454002Sarchie if (moff == m->m_len) { 82554002Sarchie m = m->m_next; 82654002Sarchie moff = 0; 82754002Sarchie } 82854002Sarchie } 82954002Sarchie 83054002Sarchie /* Check correct total mbuf length */ 83154002Sarchie KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 83254002Sarchie ("%s: bogus m_pkthdr.len", __FUNCTION__)); 83354002Sarchie } 83454002Sarchie return (top); 83554002Sarchie 83654002Sarchienospace: 83754002Sarchie m_freem(top); 83875105Salfred mtx_lock(&mbuf_mtx); 83975105Salfred mbstat.m_mcfail++; 84075105Salfred mtx_unlock(&mbuf_mtx); 84172356Sbmilekic return (NULL); 84254002Sarchie} 84354002Sarchie 84454002Sarchie/* 8451541Srgrimes * Concatenate mbuf chain n to m. 8461541Srgrimes * Both chains must be of the same type (e.g. MT_DATA). 8471541Srgrimes * Any m_pkthdr is not updated. 8481541Srgrimes */ 8491549Srgrimesvoid 85072356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n) 8511541Srgrimes{ 8521541Srgrimes while (m->m_next) 8531541Srgrimes m = m->m_next; 8541541Srgrimes while (n) { 8551541Srgrimes if (m->m_flags & M_EXT || 8561541Srgrimes m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 8571541Srgrimes /* just join the two chains */ 8581541Srgrimes m->m_next = n; 8591541Srgrimes return; 8601541Srgrimes } 8611541Srgrimes /* splat the data from one into the other */ 8621541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 8631541Srgrimes (u_int)n->m_len); 8641541Srgrimes m->m_len += n->m_len; 8651541Srgrimes n = m_free(n); 8661541Srgrimes } 8671541Srgrimes} 8681541Srgrimes 8691549Srgrimesvoid 87072356Sbmilekicm_adj(struct mbuf *mp, int req_len) 8711541Srgrimes{ 87272356Sbmilekic int len = req_len; 87372356Sbmilekic struct mbuf *m; 87472356Sbmilekic int count; 8751541Srgrimes 8761541Srgrimes if ((m = mp) == NULL) 8771541Srgrimes return; 8781541Srgrimes if (len >= 0) { 8791541Srgrimes /* 8801541Srgrimes * Trim from head. 8811541Srgrimes */ 8821541Srgrimes while (m != NULL && len > 0) { 8831541Srgrimes if (m->m_len <= len) { 8841541Srgrimes len -= m->m_len; 8851541Srgrimes m->m_len = 0; 8861541Srgrimes m = m->m_next; 8871541Srgrimes } else { 8881541Srgrimes m->m_len -= len; 8891541Srgrimes m->m_data += len; 8901541Srgrimes len = 0; 8911541Srgrimes } 8921541Srgrimes } 8931541Srgrimes m = mp; 8941541Srgrimes if (mp->m_flags & M_PKTHDR) 8951541Srgrimes m->m_pkthdr.len -= (req_len - len); 8961541Srgrimes } else { 8971541Srgrimes /* 8981541Srgrimes * Trim from tail. Scan the mbuf chain, 8991541Srgrimes * calculating its length and finding the last mbuf. 9001541Srgrimes * If the adjustment only affects this mbuf, then just 9011541Srgrimes * adjust and return. Otherwise, rescan and truncate 9021541Srgrimes * after the remaining size. 9031541Srgrimes */ 9041541Srgrimes len = -len; 9051541Srgrimes count = 0; 9061541Srgrimes for (;;) { 9071541Srgrimes count += m->m_len; 9081541Srgrimes if (m->m_next == (struct mbuf *)0) 9091541Srgrimes break; 9101541Srgrimes m = m->m_next; 9111541Srgrimes } 9121541Srgrimes if (m->m_len >= len) { 9131541Srgrimes m->m_len -= len; 9141541Srgrimes if (mp->m_flags & M_PKTHDR) 9151541Srgrimes mp->m_pkthdr.len -= len; 9161541Srgrimes return; 9171541Srgrimes } 9181541Srgrimes count -= len; 9191541Srgrimes if (count < 0) 9201541Srgrimes count = 0; 9211541Srgrimes /* 9221541Srgrimes * Correct length for chain is "count". 9231541Srgrimes * Find the mbuf with last data, adjust its length, 9241541Srgrimes * and toss data from remaining mbufs on chain. 9251541Srgrimes */ 9261541Srgrimes m = mp; 9271541Srgrimes if (m->m_flags & M_PKTHDR) 9281541Srgrimes m->m_pkthdr.len = count; 9291541Srgrimes for (; m; m = m->m_next) { 9301541Srgrimes if (m->m_len >= count) { 9311541Srgrimes m->m_len = count; 9321541Srgrimes break; 9331541Srgrimes } 9341541Srgrimes count -= m->m_len; 9351541Srgrimes } 9363308Sphk while (m->m_next) 9373308Sphk (m = m->m_next) ->m_len = 0; 9381541Srgrimes } 9391541Srgrimes} 9401541Srgrimes 9411541Srgrimes/* 9421541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous 9431541Srgrimes * and in the data area of an mbuf (so that mtod and dtom 9441541Srgrimes * will work for a structure of size len). Returns the resulting 9451541Srgrimes * mbuf chain on success, frees it and returns null on failure. 9461541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the 9471541Srgrimes * contiguous region in an attempt to avoid being called next time. 9481541Srgrimes */ 9491541Srgrimesstruct mbuf * 95072356Sbmilekicm_pullup(struct mbuf *n, int len) 9511541Srgrimes{ 95272356Sbmilekic struct mbuf *m; 95372356Sbmilekic int count; 9541541Srgrimes int space; 9551541Srgrimes 9561541Srgrimes /* 9571541Srgrimes * If first mbuf has no cluster, and has room for len bytes 9581541Srgrimes * without shifting current data, pullup into it, 9591541Srgrimes * otherwise allocate a new mbuf to prepend to the chain. 9601541Srgrimes */ 9611541Srgrimes if ((n->m_flags & M_EXT) == 0 && 9621541Srgrimes n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 9631541Srgrimes if (n->m_len >= len) 9641541Srgrimes return (n); 9651541Srgrimes m = n; 9661541Srgrimes n = n->m_next; 9671541Srgrimes len -= m->m_len; 9681541Srgrimes } else { 9691541Srgrimes if (len > MHLEN) 9701541Srgrimes goto bad; 9711541Srgrimes MGET(m, M_DONTWAIT, n->m_type); 97272356Sbmilekic if (m == NULL) 9731541Srgrimes goto bad; 9741541Srgrimes m->m_len = 0; 9751541Srgrimes if (n->m_flags & M_PKTHDR) { 9761541Srgrimes M_COPY_PKTHDR(m, n); 9771541Srgrimes n->m_flags &= ~M_PKTHDR; 9781541Srgrimes } 9791541Srgrimes } 9801541Srgrimes space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 9811541Srgrimes do { 9821541Srgrimes count = min(min(max(len, max_protohdr), space), n->m_len); 9831541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 9841541Srgrimes (unsigned)count); 9851541Srgrimes len -= count; 9861541Srgrimes m->m_len += count; 9871541Srgrimes n->m_len -= count; 9881541Srgrimes space -= count; 9891541Srgrimes if (n->m_len) 9901541Srgrimes n->m_data += count; 9911541Srgrimes else 9921541Srgrimes n = m_free(n); 9931541Srgrimes } while (len > 0 && n); 9941541Srgrimes if (len > 0) { 9951541Srgrimes (void) m_free(m); 9961541Srgrimes goto bad; 9971541Srgrimes } 9981541Srgrimes m->m_next = n; 9991541Srgrimes return (m); 10001541Srgrimesbad: 10011541Srgrimes m_freem(n); 100275105Salfred mtx_lock(&mbuf_mtx); 100375105Salfred mbstat.m_mcfail++; 100475105Salfred mtx_unlock(&mbuf_mtx); 100572356Sbmilekic return (NULL); 10061541Srgrimes} 10071541Srgrimes 10081541Srgrimes/* 10091541Srgrimes * Partition an mbuf chain in two pieces, returning the tail -- 10101541Srgrimes * all but the first len0 bytes. In case of failure, it returns NULL and 10111541Srgrimes * attempts to restore the chain to its original state. 10121541Srgrimes */ 10131541Srgrimesstruct mbuf * 101472356Sbmilekicm_split(struct mbuf *m0, int len0, int wait) 10151541Srgrimes{ 101672356Sbmilekic struct mbuf *m, *n; 10171541Srgrimes unsigned len = len0, remain; 10181541Srgrimes 10191541Srgrimes for (m = m0; m && len > m->m_len; m = m->m_next) 10201541Srgrimes len -= m->m_len; 102172356Sbmilekic if (m == NULL) 102272356Sbmilekic return (NULL); 10231541Srgrimes remain = m->m_len - len; 10241541Srgrimes if (m0->m_flags & M_PKTHDR) { 10251541Srgrimes MGETHDR(n, wait, m0->m_type); 102672356Sbmilekic if (n == NULL) 102772356Sbmilekic return (NULL); 10281541Srgrimes n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 10291541Srgrimes n->m_pkthdr.len = m0->m_pkthdr.len - len0; 10301541Srgrimes m0->m_pkthdr.len = len0; 10311541Srgrimes if (m->m_flags & M_EXT) 10321541Srgrimes goto extpacket; 10331541Srgrimes if (remain > MHLEN) { 10341541Srgrimes /* m can't be the lead packet */ 10351541Srgrimes MH_ALIGN(n, 0); 10361541Srgrimes n->m_next = m_split(m, len, wait); 103772356Sbmilekic if (n->m_next == NULL) { 10381541Srgrimes (void) m_free(n); 103972356Sbmilekic return (NULL); 10401541Srgrimes } else 10411541Srgrimes return (n); 10421541Srgrimes } else 10431541Srgrimes MH_ALIGN(n, remain); 10441541Srgrimes } else if (remain == 0) { 10451541Srgrimes n = m->m_next; 104672356Sbmilekic m->m_next = NULL; 10471541Srgrimes return (n); 10481541Srgrimes } else { 10491541Srgrimes MGET(n, wait, m->m_type); 105072356Sbmilekic if (n == NULL) 105172356Sbmilekic return (NULL); 10521541Srgrimes M_ALIGN(n, remain); 10531541Srgrimes } 10541541Srgrimesextpacket: 10551541Srgrimes if (m->m_flags & M_EXT) { 10561541Srgrimes n->m_flags |= M_EXT; 10571541Srgrimes n->m_ext = m->m_ext; 105864837Sdwmalone MEXT_ADD_REF(m); 10591541Srgrimes m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 10601541Srgrimes n->m_data = m->m_data + len; 10611541Srgrimes } else { 10621541Srgrimes bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 10631541Srgrimes } 10641541Srgrimes n->m_len = remain; 10651541Srgrimes m->m_len = len; 10661541Srgrimes n->m_next = m->m_next; 106772356Sbmilekic m->m_next = NULL; 10681541Srgrimes return (n); 10691541Srgrimes} 10701541Srgrimes/* 10711541Srgrimes * Routine to copy from device local memory into mbufs. 10721541Srgrimes */ 10731541Srgrimesstruct mbuf * 107472356Sbmilekicm_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 107572356Sbmilekic void (*copy)(char *from, caddr_t to, u_int len)) 10761541Srgrimes{ 107772356Sbmilekic struct mbuf *m; 10781541Srgrimes struct mbuf *top = 0, **mp = ⊤ 107972356Sbmilekic int off = off0, len; 108072356Sbmilekic char *cp; 10811541Srgrimes char *epkt; 10821541Srgrimes 10831541Srgrimes cp = buf; 10841541Srgrimes epkt = cp + totlen; 10851541Srgrimes if (off) { 10861541Srgrimes cp += off + 2 * sizeof(u_short); 10871541Srgrimes totlen -= 2 * sizeof(u_short); 10881541Srgrimes } 10891541Srgrimes MGETHDR(m, M_DONTWAIT, MT_DATA); 109072356Sbmilekic if (m == NULL) 109172356Sbmilekic return (NULL); 10921541Srgrimes m->m_pkthdr.rcvif = ifp; 10931541Srgrimes m->m_pkthdr.len = totlen; 10941541Srgrimes m->m_len = MHLEN; 10951541Srgrimes 10961541Srgrimes while (totlen > 0) { 10971541Srgrimes if (top) { 10981541Srgrimes MGET(m, M_DONTWAIT, MT_DATA); 109972356Sbmilekic if (m == NULL) { 11001541Srgrimes m_freem(top); 110172356Sbmilekic return (NULL); 11021541Srgrimes } 11031541Srgrimes m->m_len = MLEN; 11041541Srgrimes } 11051541Srgrimes len = min(totlen, epkt - cp); 11061541Srgrimes if (len >= MINCLSIZE) { 11071541Srgrimes MCLGET(m, M_DONTWAIT); 11081541Srgrimes if (m->m_flags & M_EXT) 11091541Srgrimes m->m_len = len = min(len, MCLBYTES); 11101541Srgrimes else 11111541Srgrimes len = m->m_len; 11121541Srgrimes } else { 11131541Srgrimes /* 11141541Srgrimes * Place initial small packet/header at end of mbuf. 11151541Srgrimes */ 11161541Srgrimes if (len < m->m_len) { 111772356Sbmilekic if (top == NULL && len + 111872356Sbmilekic max_linkhdr <= m->m_len) 11191541Srgrimes m->m_data += max_linkhdr; 11201541Srgrimes m->m_len = len; 11211541Srgrimes } else 11221541Srgrimes len = m->m_len; 11231541Srgrimes } 11241541Srgrimes if (copy) 11251541Srgrimes copy(cp, mtod(m, caddr_t), (unsigned)len); 11261541Srgrimes else 11271541Srgrimes bcopy(cp, mtod(m, caddr_t), (unsigned)len); 11281541Srgrimes cp += len; 11291541Srgrimes *mp = m; 11301541Srgrimes mp = &m->m_next; 11311541Srgrimes totlen -= len; 11321541Srgrimes if (cp == epkt) 11331541Srgrimes cp = buf; 11341541Srgrimes } 11351541Srgrimes return (top); 11361541Srgrimes} 11373352Sphk 11383352Sphk/* 11393352Sphk * Copy data from a buffer back into the indicated mbuf chain, 11403352Sphk * starting "off" bytes from the beginning, extending the mbuf 11413352Sphk * chain if necessary. 11423352Sphk */ 11433352Sphkvoid 114472356Sbmilekicm_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 11453352Sphk{ 114672356Sbmilekic int mlen; 114772356Sbmilekic struct mbuf *m = m0, *n; 11483352Sphk int totlen = 0; 11493352Sphk 115072356Sbmilekic if (m0 == NULL) 11513352Sphk return; 11523352Sphk while (off > (mlen = m->m_len)) { 11533352Sphk off -= mlen; 11543352Sphk totlen += mlen; 115572356Sbmilekic if (m->m_next == NULL) { 11563352Sphk n = m_getclr(M_DONTWAIT, m->m_type); 115772356Sbmilekic if (n == NULL) 11583352Sphk goto out; 11593352Sphk n->m_len = min(MLEN, len + off); 11603352Sphk m->m_next = n; 11613352Sphk } 11623352Sphk m = m->m_next; 11633352Sphk } 11643352Sphk while (len > 0) { 11653352Sphk mlen = min (m->m_len - off, len); 11663352Sphk bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 11673352Sphk cp += mlen; 11683352Sphk len -= mlen; 11693352Sphk mlen += off; 11703352Sphk off = 0; 11713352Sphk totlen += mlen; 11723352Sphk if (len == 0) 11733352Sphk break; 117472356Sbmilekic if (m->m_next == NULL) { 11753352Sphk n = m_get(M_DONTWAIT, m->m_type); 117672356Sbmilekic if (n == NULL) 11773352Sphk break; 11783352Sphk n->m_len = min(MLEN, len); 11793352Sphk m->m_next = n; 11803352Sphk } 11813352Sphk m = m->m_next; 11823352Sphk } 11833352Sphkout: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 11843352Sphk m->m_pkthdr.len = totlen; 11853352Sphk} 118652756Sphk 118752756Sphkvoid 118852756Sphkm_print(const struct mbuf *m) 118952756Sphk{ 119052756Sphk int len; 119154906Seivind const struct mbuf *m2; 119252756Sphk 119352756Sphk len = m->m_pkthdr.len; 119452756Sphk m2 = m; 119552756Sphk while (len) { 119652756Sphk printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 119752756Sphk len -= m2->m_len; 119852756Sphk m2 = m2->m_next; 119952756Sphk } 120052756Sphk return; 120152756Sphk} 1202