uipc_mbuf.c revision 76166
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 331541Srgrimes * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 3450477Speter * $FreeBSD: head/sys/kern/uipc_mbuf.c 76166 2001-05-01 08:13:21Z markm $ 351541Srgrimes */ 361541Srgrimes 3748579Smsmith#include "opt_param.h" 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 4076166Smarkm#include <sys/condvar.h> 4176166Smarkm#include <sys/kernel.h> 4276166Smarkm#include <sys/lock.h> 4332036Sbde#include <sys/malloc.h> 441541Srgrimes#include <sys/mbuf.h> 4567365Sjhb#include <sys/mutex.h> 4623081Swollman#include <sys/sysctl.h> 471541Srgrimes#include <sys/domain.h> 481541Srgrimes#include <sys/protosw.h> 4976166Smarkm 501541Srgrimes#include <vm/vm.h> 519759Sbde#include <vm/vm_kern.h> 5212662Sdg#include <vm/vm_extern.h> 531541Srgrimes 5472356Sbmilekicstatic void mbinit(void *); 5510358SjulianSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 5610358Sjulian 579759Sbdestruct mbuf *mbutl; 589759Sbdestruct mbstat mbstat; 5963203Salfredu_long mbtypes[MT_NTYPES]; 609759Sbdeint max_linkhdr; 619759Sbdeint max_protohdr; 629759Sbdeint max_hdr; 639759Sbdeint max_datalen; 6448579Smsmithint nmbclusters; 6548579Smsmithint nmbufs; 6667144Sbmilekicint nmbcnt; 6766475Sbmilekicu_long m_mballoc_wid = 0; 6866475Sbmilekicu_long m_clalloc_wid = 0; 691541Srgrimes 7066475Sbmilekic/* 7166475Sbmilekic * freelist header structures... 7266475Sbmilekic * mbffree_lst, mclfree_lst, mcntfree_lst 7366475Sbmilekic */ 7466475Sbmilekicstruct mbffree_lst mmbfree; 7566475Sbmilekicstruct mclfree_lst mclfree; 7666475Sbmilekicstruct mcntfree_lst mcntfree; 7775105Salfredstruct mtx mbuf_mtx; 7866475Sbmilekic 7966475Sbmilekic/* 8066475Sbmilekic * sysctl(8) exported objects 8166475Sbmilekic */ 8244078SdfrSYSCTL_DECL(_kern_ipc); 8323081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 8423081Swollman &max_linkhdr, 0, ""); 8523081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 8623081Swollman &max_protohdr, 0, ""); 8723081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 8823081SwollmanSYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 8923081Swollman &max_datalen, 0, ""); 9054478SgreenSYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 9154478Sgreen &mbuf_wait, 0, ""); 9264048SalfredSYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 9363203SalfredSYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 9463203Salfred sizeof(mbtypes), "LU", ""); 9548579SmsmithSYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 9655171Smsmith &nmbclusters, 0, "Maximum number of mbuf clusters available"); 9755171SmsmithSYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 9855171Smsmith "Maximum number of mbufs available"); 9967144SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 10067144Sbmilekic "Maximum number of ext_buf counters available"); 10175112Sbmilekic 10248579Smsmith#ifndef NMBCLUSTERS 10348579Smsmith#define NMBCLUSTERS (512 + MAXUSERS * 16) 10448579Smsmith#endif 10575112Sbmilekic 10648579SmsmithTUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 10755171SmsmithTUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 10867144SbmilekicTUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 10923081Swollman 11072356Sbmilekicstatic void m_reclaim(void); 11112819Sphk 11266475Sbmilekic/* Initial allocation numbers */ 11364837Sdwmalone#define NCL_INIT 2 11415744Sphk#define NMB_INIT 16 11566475Sbmilekic#define REF_INIT NMBCLUSTERS 11615744Sphk 11766475Sbmilekic/* 11866475Sbmilekic * Full mbuf subsystem initialization done here. 11966475Sbmilekic * 12066475Sbmilekic * XXX: If ever we have system specific map setups to do, then move them to 12166475Sbmilekic * machdep.c - for now, there is no reason for this stuff to go there. 12266475Sbmilekic */ 12310358Sjulianstatic void 12472356Sbmilekicmbinit(void *dummy) 1251541Srgrimes{ 12675686Sbmilekic vm_offset_t maxaddr; 12775686Sbmilekic vm_size_t mb_map_size; 1281541Srgrimes 12966475Sbmilekic /* 13066475Sbmilekic * Setup the mb_map, allocate requested VM space. 13166475Sbmilekic */ 13275686Sbmilekic mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + 13375686Sbmilekic nmbcnt * sizeof(union mext_refcnt)); 13475686Sbmilekic mb_map_size = rounddown(mb_map_size, PAGE_SIZE); 13566475Sbmilekic mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 13666475Sbmilekic mb_map_size); 13772356Sbmilekic /* XXX XXX XXX: mb_map->system_map = 1; */ 13864837Sdwmalone 13966475Sbmilekic /* 14066475Sbmilekic * Initialize the free list headers, and setup locks for lists. 14166475Sbmilekic */ 14266475Sbmilekic mmbfree.m_head = NULL; 14366475Sbmilekic mclfree.m_head = NULL; 14466475Sbmilekic mcntfree.m_head = NULL; 14575105Salfred mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); 14675112Sbmilekic cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); 14775112Sbmilekic cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); 14866475Sbmilekic 14966475Sbmilekic /* 15066475Sbmilekic * Initialize mbuf subsystem (sysctl exported) statistics structure. 15166475Sbmilekic */ 15223081Swollman mbstat.m_msize = MSIZE; 15323081Swollman mbstat.m_mclbytes = MCLBYTES; 15423081Swollman mbstat.m_minclsize = MINCLSIZE; 15523081Swollman mbstat.m_mlen = MLEN; 15623081Swollman mbstat.m_mhlen = MHLEN; 15723081Swollman 15866475Sbmilekic /* 15966475Sbmilekic * Perform some initial allocations. 16066475Sbmilekic */ 16175105Salfred mtx_lock(&mbuf_mtx); 16266475Sbmilekic if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 16364837Sdwmalone goto bad; 16415689Swollman if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 16515689Swollman goto bad; 1661541Srgrimes if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 1671541Srgrimes goto bad; 16875105Salfred mtx_unlock(&mbuf_mtx); 16966475Sbmilekic 1701541Srgrimes return; 1711541Srgrimesbad: 17264837Sdwmalone panic("mbinit: failed to initialize mbuf subsystem!"); 1731541Srgrimes} 1741541Srgrimes 1751541Srgrimes/* 17664837Sdwmalone * Allocate at least nmb reference count structs and place them 17764837Sdwmalone * on the ref cnt free list. 17866475Sbmilekic * 17966475Sbmilekic * Must be called with the mcntfree lock held. 18064837Sdwmalone */ 18164837Sdwmaloneint 18272356Sbmilekicm_alloc_ref(u_int nmb, int how) 18364837Sdwmalone{ 18464837Sdwmalone caddr_t p; 18564837Sdwmalone u_int nbytes; 18664837Sdwmalone int i; 18764837Sdwmalone 18864837Sdwmalone /* 18964837Sdwmalone * We don't cap the amount of memory that can be used 19064837Sdwmalone * by the reference counters, like we do for mbufs and 19166475Sbmilekic * mbuf clusters. In fact, we're absolutely sure that we 19266475Sbmilekic * won't ever be going over our allocated space. We keep enough 19366475Sbmilekic * space in mb_map to accomodate maximum values of allocatable 19466475Sbmilekic * external buffers including, but not limited to, clusters. 19566475Sbmilekic * (That's also why we won't have to have wait routines for 19666475Sbmilekic * counters). 19766475Sbmilekic * 19866475Sbmilekic * If we're in here, we're absolutely certain to be returning 19966475Sbmilekic * succesfully, as long as there is physical memory to accomodate 20066475Sbmilekic * us. And if there isn't, but we're willing to wait, then 20166475Sbmilekic * kmem_malloc() will do the only waiting needed. 20264837Sdwmalone */ 20364837Sdwmalone 20464837Sdwmalone nbytes = round_page(nmb * sizeof(union mext_refcnt)); 20575105Salfred if (1 /* XXX: how == M_TRYWAIT */) 20675105Salfred mtx_unlock(&mbuf_mtx); 20770254Sbmilekic if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 20870254Sbmilekic M_WAITOK : M_NOWAIT)) == NULL) { 20975105Salfred if (1 /* XXX: how == M_TRYWAIT */) 21075105Salfred mtx_lock(&mbuf_mtx); 21164837Sdwmalone return (0); 21266475Sbmilekic } 21364837Sdwmalone nmb = nbytes / sizeof(union mext_refcnt); 21464837Sdwmalone 21566475Sbmilekic /* 21666475Sbmilekic * We don't let go of the mutex in order to avoid a race. 21766475Sbmilekic * It is up to the caller to let go of the mutex. 21866475Sbmilekic */ 21975105Salfred if (1 /* XXX: how == M_TRYWAIT */) 22075105Salfred mtx_lock(&mbuf_mtx); 22164837Sdwmalone for (i = 0; i < nmb; i++) { 22266475Sbmilekic ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 22366475Sbmilekic mcntfree.m_head = (union mext_refcnt *)p; 22464837Sdwmalone p += sizeof(union mext_refcnt); 22564837Sdwmalone mbstat.m_refree++; 22664837Sdwmalone } 22764837Sdwmalone mbstat.m_refcnt += nmb; 22864837Sdwmalone 22964837Sdwmalone return (1); 23064837Sdwmalone} 23164837Sdwmalone 23264837Sdwmalone/* 23315689Swollman * Allocate at least nmb mbufs and place on mbuf free list. 23466475Sbmilekic * 23566475Sbmilekic * Must be called with the mmbfree lock held. 23615689Swollman */ 23715689Swollmanint 23872356Sbmilekicm_mballoc(int nmb, int how) 23915689Swollman{ 24072356Sbmilekic caddr_t p; 24172356Sbmilekic int i; 24215689Swollman int nbytes; 24315689Swollman 24474402Sbmilekic nbytes = round_page(nmb * MSIZE); 24574402Sbmilekic nmb = nbytes / MSIZE; 24674402Sbmilekic 24754478Sgreen /* 24866475Sbmilekic * If we've hit the mbuf limit, stop allocating from mb_map. 24966475Sbmilekic * Also, once we run out of map space, it will be impossible to 25066475Sbmilekic * get any more (nothing is ever freed back to the map). 25155171Smsmith */ 25274764Sbmilekic if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) 25355171Smsmith return (0); 25455171Smsmith 25575105Salfred if (1 /* XXX: how == M_TRYWAIT */) 25675105Salfred mtx_unlock(&mbuf_mtx); 25775105Salfred p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? 25875105Salfred M_WAITOK : M_NOWAIT); 25975105Salfred if (1 /* XXX: how == M_TRYWAIT */) { 26075105Salfred mtx_lock(&mbuf_mtx); 26175105Salfred if (p == NULL) 26275105Salfred mbstat.m_wait++; 26322899Swollman } 26422899Swollman 26515689Swollman /* 26666475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 26715689Swollman * are no pages left. 26815689Swollman */ 26915689Swollman if (p == NULL) 27015689Swollman return (0); 27115689Swollman 27266475Sbmilekic /* 27366475Sbmilekic * We don't let go of the mutex in order to avoid a race. 27466475Sbmilekic * It is up to the caller to let go of the mutex when done 27566475Sbmilekic * with grabbing the mbuf from the free list. 27666475Sbmilekic */ 27715689Swollman for (i = 0; i < nmb; i++) { 27866475Sbmilekic ((struct mbuf *)p)->m_next = mmbfree.m_head; 27966475Sbmilekic mmbfree.m_head = (struct mbuf *)p; 28015689Swollman p += MSIZE; 28115689Swollman } 28215689Swollman mbstat.m_mbufs += nmb; 28363203Salfred mbtypes[MT_FREE] += nmb; 28415689Swollman return (1); 28515689Swollman} 28615689Swollman 28754478Sgreen/* 28854478Sgreen * Once the mb_map has been exhausted and if the call to the allocation macros 28970254Sbmilekic * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to 29070254Sbmilekic * rely solely on reclaimed mbufs. 29166475Sbmilekic * 29266475Sbmilekic * Here we request for the protocols to free up some resources and, if we 29366475Sbmilekic * still cannot get anything, then we wait for an mbuf to be freed for a 29475112Sbmilekic * designated (mbuf_wait) time, at most. 29566475Sbmilekic * 29670858Sbmilekic * Must be called with the mmbfree mutex held. 29754478Sgreen */ 29854478Sgreenstruct mbuf * 29966475Sbmilekicm_mballoc_wait(void) 30054478Sgreen{ 30166475Sbmilekic struct mbuf *p = NULL; 30254478Sgreen 30354478Sgreen /* 30466475Sbmilekic * See if we can drain some resources out of the protocols. 30570858Sbmilekic * We drop the mmbfree mutex to avoid recursing into it in some of 30670858Sbmilekic * the drain routines. Clearly, we're faced with a race here because 30770858Sbmilekic * once something is freed during the drain, it may be grabbed right 30870858Sbmilekic * from under us by some other thread. But we accept this possibility 30970858Sbmilekic * in order to avoid a potentially large lock recursion and, more 31070858Sbmilekic * importantly, to avoid a potential lock order reversal which may 31170858Sbmilekic * result in deadlock (See comment above m_reclaim()). 31254478Sgreen */ 31375105Salfred mtx_unlock(&mbuf_mtx); 31466475Sbmilekic m_reclaim(); 31570858Sbmilekic 31675105Salfred mtx_lock(&mbuf_mtx); 31766475Sbmilekic _MGET(p, M_DONTWAIT); 31854478Sgreen 31966475Sbmilekic if (p == NULL) { 32075112Sbmilekic int retval; 32175112Sbmilekic 32266475Sbmilekic m_mballoc_wid++; 32375112Sbmilekic retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, 32471302Sbmilekic mbuf_wait); 32571302Sbmilekic m_mballoc_wid--; 32666475Sbmilekic 32766475Sbmilekic /* 32875112Sbmilekic * If we got signaled (i.e. didn't time out), allocate. 32966475Sbmilekic */ 33075112Sbmilekic if (retval == 0) 33175112Sbmilekic _MGET(p, M_DONTWAIT); 33254478Sgreen } 33354478Sgreen 33466475Sbmilekic if (p != NULL) { 33575105Salfred mbstat.m_wait++; 33666475Sbmilekic if (mmbfree.m_head != NULL) 33775112Sbmilekic MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); 33874764Sbmilekic } 33922671Swollman 34066475Sbmilekic return (p); 34122671Swollman} 34222671Swollman 34315689Swollman/* 3441541Srgrimes * Allocate some number of mbuf clusters 3451541Srgrimes * and place on cluster free list. 34666475Sbmilekic * 34766475Sbmilekic * Must be called with the mclfree lock held. 3481541Srgrimes */ 3491549Srgrimesint 35072356Sbmilekicm_clalloc(int ncl, int how) 3511541Srgrimes{ 35272356Sbmilekic caddr_t p; 35372356Sbmilekic int i; 35474402Sbmilekic int npg_sz; 3551541Srgrimes 35674402Sbmilekic npg_sz = round_page(ncl * MCLBYTES); 35774402Sbmilekic ncl = npg_sz / MCLBYTES; 35874402Sbmilekic 3597066Sdg /* 36066475Sbmilekic * If the map is now full (nothing will ever be freed to it). 36155171Smsmith * If we've hit the mcluster number limit, stop allocating from 36266475Sbmilekic * mb_map. 36355171Smsmith */ 36474764Sbmilekic if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) 36555171Smsmith return (0); 36655171Smsmith 36775105Salfred if (1 /* XXX: how == M_TRYWAIT */) 36875105Salfred mtx_unlock(&mbuf_mtx); 36974402Sbmilekic p = (caddr_t)kmem_malloc(mb_map, npg_sz, 37070254Sbmilekic how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); 37175105Salfred if (1 /* XXX: how == M_TRYWAIT */) 37275105Salfred mtx_lock(&mbuf_mtx); 37366475Sbmilekic 3747066Sdg /* 37566475Sbmilekic * Either the map is now full, or `how' is M_DONTWAIT and there 3767066Sdg * are no pages left. 3777066Sdg */ 37874764Sbmilekic if (p == NULL) 3791541Srgrimes return (0); 3807066Sdg 3811541Srgrimes for (i = 0; i < ncl; i++) { 38266475Sbmilekic ((union mcluster *)p)->mcl_next = mclfree.m_head; 38366475Sbmilekic mclfree.m_head = (union mcluster *)p; 3841541Srgrimes p += MCLBYTES; 3851541Srgrimes mbstat.m_clfree++; 3861541Srgrimes } 3871541Srgrimes mbstat.m_clusters += ncl; 3881541Srgrimes return (1); 3891541Srgrimes} 3901541Srgrimes 3911541Srgrimes/* 39254478Sgreen * Once the mb_map submap has been exhausted and the allocation is called with 39370254Sbmilekic * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will 39475112Sbmilekic * block on a cv for a designated amount of time (mbuf_wait) or until we're 39575112Sbmilekic * signaled due to sudden mcluster availability. 39666475Sbmilekic * 39766475Sbmilekic * Must be called with the mclfree lock held. 39854478Sgreen */ 39954478Sgreencaddr_t 40054478Sgreenm_clalloc_wait(void) 40154478Sgreen{ 40266475Sbmilekic caddr_t p = NULL; 40375112Sbmilekic int retval; 40454478Sgreen 40554478Sgreen m_clalloc_wid++; 40675112Sbmilekic retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); 40771302Sbmilekic m_clalloc_wid--; 40854478Sgreen 40954478Sgreen /* 41066475Sbmilekic * Now that we (think) that we've got something, try again. 41154478Sgreen */ 41275112Sbmilekic if (retval == 0) 41375112Sbmilekic _MCLALLOC(p, M_DONTWAIT); 41454478Sgreen 41566475Sbmilekic if (p != NULL) { 41675105Salfred mbstat.m_wait++; 41766475Sbmilekic if (mclfree.m_head != NULL) 41875112Sbmilekic MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); 41974764Sbmilekic } 42054478Sgreen 42154478Sgreen return (p); 42254478Sgreen} 42354478Sgreen 42454478Sgreen/* 42566475Sbmilekic * m_reclaim: drain protocols in hopes to free up some resources... 42666475Sbmilekic * 42770858Sbmilekic * XXX: No locks should be held going in here. The drain routines have 42870858Sbmilekic * to presently acquire some locks which raises the possibility of lock 42970858Sbmilekic * order violation if we're holding any mutex if that mutex is acquired in 43070858Sbmilekic * reverse order relative to one of the locks in the drain routines. 4311541Srgrimes */ 43212819Sphkstatic void 43372356Sbmilekicm_reclaim(void) 4341541Srgrimes{ 43572356Sbmilekic struct domain *dp; 43672356Sbmilekic struct protosw *pr; 4371541Srgrimes 43871089Sbmilekic#ifdef WITNESS 43975112Sbmilekic KASSERT(witness_list(curproc) == 0, 44071089Sbmilekic ("m_reclaim called with locks held")); 44171089Sbmilekic#endif 44271089Sbmilekic 4431541Srgrimes for (dp = domains; dp; dp = dp->dom_next) 4441541Srgrimes for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 4451541Srgrimes if (pr->pr_drain) 4461541Srgrimes (*pr->pr_drain)(); 4471541Srgrimes mbstat.m_drain++; 4481541Srgrimes} 4491541Srgrimes 4501541Srgrimes/* 4511541Srgrimes * Space allocation routines. 45272473Sbmilekic * Some of these are also available as macros 4531541Srgrimes * for critical paths. 4541541Srgrimes */ 4551541Srgrimesstruct mbuf * 45672356Sbmilekicm_get(int how, int type) 4571541Srgrimes{ 45872356Sbmilekic struct mbuf *m; 4591541Srgrimes 46032036Sbde MGET(m, how, type); 4611541Srgrimes return (m); 4621541Srgrimes} 4631541Srgrimes 4641541Srgrimesstruct mbuf * 46572356Sbmilekicm_gethdr(int how, int type) 4661541Srgrimes{ 46772356Sbmilekic struct mbuf *m; 4681541Srgrimes 46932036Sbde MGETHDR(m, how, type); 4701541Srgrimes return (m); 4711541Srgrimes} 4721541Srgrimes 4731541Srgrimesstruct mbuf * 47472356Sbmilekicm_getclr(int how, int type) 4751541Srgrimes{ 47672356Sbmilekic struct mbuf *m; 4771541Srgrimes 47832036Sbde MGET(m, how, type); 47975105Salfred if (m != NULL) 48075105Salfred bzero(mtod(m, caddr_t), MLEN); 4811541Srgrimes return (m); 4821541Srgrimes} 4831541Srgrimes 4841541Srgrimesstruct mbuf * 48572356Sbmilekicm_free(struct mbuf *m) 4861541Srgrimes{ 48772356Sbmilekic struct mbuf *n; 4881541Srgrimes 4891541Srgrimes MFREE(m, n); 4901541Srgrimes return (n); 4911541Srgrimes} 4921541Srgrimes 49372473Sbmilekic/* 49472473Sbmilekic * struct mbuf * 49572473Sbmilekic * m_getm(m, len, how, type) 49672473Sbmilekic * 49772473Sbmilekic * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits 49872473Sbmilekic * best) and return a pointer to the top of the allocated chain. If m is 49972473Sbmilekic * non-null, then we assume that it is a single mbuf or an mbuf chain to 50072473Sbmilekic * which we want len bytes worth of mbufs and/or clusters attached, and so 50172473Sbmilekic * if we succeed in allocating it, we will just return a pointer to m. 50272473Sbmilekic * 50372473Sbmilekic * If we happen to fail at any point during the allocation, we will free 50472473Sbmilekic * up everything we have already allocated and return NULL. 50572473Sbmilekic * 50672473Sbmilekic */ 50772473Sbmilekicstruct mbuf * 50872473Sbmilekicm_getm(struct mbuf *m, int len, int how, int type) 50972473Sbmilekic{ 51072473Sbmilekic struct mbuf *top, *tail, *mp, *mtail = NULL; 51172473Sbmilekic 51272473Sbmilekic KASSERT(len >= 0, ("len is < 0 in m_getm")); 51372473Sbmilekic 51472789Sbp MGET(mp, how, type); 51572473Sbmilekic if (mp == NULL) 51672473Sbmilekic return (NULL); 51772473Sbmilekic else if (len > MINCLSIZE) { 51872473Sbmilekic MCLGET(mp, how); 51972473Sbmilekic if ((mp->m_flags & M_EXT) == 0) { 52072473Sbmilekic m_free(mp); 52172473Sbmilekic return (NULL); 52272473Sbmilekic } 52372473Sbmilekic } 52472473Sbmilekic mp->m_len = 0; 52572473Sbmilekic len -= M_TRAILINGSPACE(mp); 52672473Sbmilekic 52772473Sbmilekic if (m != NULL) 52872473Sbmilekic for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); 52972473Sbmilekic else 53072473Sbmilekic m = mp; 53172473Sbmilekic 53272473Sbmilekic top = tail = mp; 53372473Sbmilekic while (len > 0) { 53472789Sbp MGET(mp, how, type); 53572473Sbmilekic if (mp == NULL) 53672473Sbmilekic goto failed; 53772473Sbmilekic 53872473Sbmilekic tail->m_next = mp; 53972473Sbmilekic tail = mp; 54072473Sbmilekic if (len > MINCLSIZE) { 54172473Sbmilekic MCLGET(mp, how); 54272473Sbmilekic if ((mp->m_flags & M_EXT) == 0) 54372473Sbmilekic goto failed; 54472473Sbmilekic } 54572473Sbmilekic 54672473Sbmilekic mp->m_len = 0; 54772473Sbmilekic len -= M_TRAILINGSPACE(mp); 54872473Sbmilekic } 54972473Sbmilekic 55072473Sbmilekic if (mtail != NULL) 55172473Sbmilekic mtail->m_next = top; 55272473Sbmilekic return (m); 55372473Sbmilekic 55472473Sbmilekicfailed: 55572473Sbmilekic m_freem(top); 55672473Sbmilekic return (NULL); 55772473Sbmilekic} 55872473Sbmilekic 5591541Srgrimesvoid 56072356Sbmilekicm_freem(struct mbuf *m) 5611541Srgrimes{ 56272356Sbmilekic struct mbuf *n; 5631541Srgrimes 5641541Srgrimes if (m == NULL) 5651541Srgrimes return; 5661541Srgrimes do { 56762587Sitojun /* 56862587Sitojun * we do need to check non-first mbuf, since some of existing 56962587Sitojun * code does not call M_PREPEND properly. 57062587Sitojun * (example: call to bpf_mtap from drivers) 57162587Sitojun */ 57262587Sitojun if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 57362587Sitojun m_freem(m->m_pkthdr.aux); 57462587Sitojun m->m_pkthdr.aux = NULL; 57562587Sitojun } 5761541Srgrimes MFREE(m, n); 5773308Sphk m = n; 5783308Sphk } while (m); 5791541Srgrimes} 5801541Srgrimes 5811541Srgrimes/* 5821541Srgrimes * Lesser-used path for M_PREPEND: 5831541Srgrimes * allocate new mbuf to prepend to chain, 5841541Srgrimes * copy junk along. 5851541Srgrimes */ 5861541Srgrimesstruct mbuf * 58772356Sbmilekicm_prepend(struct mbuf *m, int len, int how) 5881541Srgrimes{ 5891541Srgrimes struct mbuf *mn; 5901541Srgrimes 5911541Srgrimes MGET(mn, how, m->m_type); 59272356Sbmilekic if (mn == NULL) { 5931541Srgrimes m_freem(m); 59472356Sbmilekic return (NULL); 5951541Srgrimes } 5961541Srgrimes if (m->m_flags & M_PKTHDR) { 5971541Srgrimes M_COPY_PKTHDR(mn, m); 5981541Srgrimes m->m_flags &= ~M_PKTHDR; 5991541Srgrimes } 6001541Srgrimes mn->m_next = m; 6011541Srgrimes m = mn; 6021541Srgrimes if (len < MHLEN) 6031541Srgrimes MH_ALIGN(m, len); 6041541Srgrimes m->m_len = len; 6051541Srgrimes return (m); 6061541Srgrimes} 6071541Srgrimes 6081541Srgrimes/* 6091541Srgrimes * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 6101541Srgrimes * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 61170254Sbmilekic * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. 61254002Sarchie * Note that the copy is read-only, because clusters are not copied, 61354002Sarchie * only their reference counts are incremented. 6141541Srgrimes */ 6151541Srgrimesstruct mbuf * 61672356Sbmilekicm_copym(struct mbuf *m, int off0, int len, int wait) 6171541Srgrimes{ 61872356Sbmilekic struct mbuf *n, **np; 61972356Sbmilekic int off = off0; 6201541Srgrimes struct mbuf *top; 6211541Srgrimes int copyhdr = 0; 6221541Srgrimes 62352201Salfred KASSERT(off >= 0, ("m_copym, negative off %d", off)); 62452201Salfred KASSERT(len >= 0, ("m_copym, negative len %d", len)); 6251541Srgrimes if (off == 0 && m->m_flags & M_PKTHDR) 6261541Srgrimes copyhdr = 1; 6271541Srgrimes while (off > 0) { 62852201Salfred KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 6291541Srgrimes if (off < m->m_len) 6301541Srgrimes break; 6311541Srgrimes off -= m->m_len; 6321541Srgrimes m = m->m_next; 6331541Srgrimes } 6341541Srgrimes np = ⊤ 6351541Srgrimes top = 0; 6361541Srgrimes while (len > 0) { 63772356Sbmilekic if (m == NULL) { 63852201Salfred KASSERT(len == M_COPYALL, 63952201Salfred ("m_copym, length > size of mbuf chain")); 6401541Srgrimes break; 6411541Srgrimes } 6421541Srgrimes MGET(n, wait, m->m_type); 6431541Srgrimes *np = n; 64472356Sbmilekic if (n == NULL) 6451541Srgrimes goto nospace; 6461541Srgrimes if (copyhdr) { 6471541Srgrimes M_COPY_PKTHDR(n, m); 6481541Srgrimes if (len == M_COPYALL) 6491541Srgrimes n->m_pkthdr.len -= off0; 6501541Srgrimes else 6511541Srgrimes n->m_pkthdr.len = len; 6521541Srgrimes copyhdr = 0; 6531541Srgrimes } 6541541Srgrimes n->m_len = min(len, m->m_len - off); 6551541Srgrimes if (m->m_flags & M_EXT) { 6561541Srgrimes n->m_data = m->m_data + off; 6571541Srgrimes n->m_ext = m->m_ext; 6581541Srgrimes n->m_flags |= M_EXT; 65964837Sdwmalone MEXT_ADD_REF(m); 6601541Srgrimes } else 6611541Srgrimes bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 6621541Srgrimes (unsigned)n->m_len); 6631541Srgrimes if (len != M_COPYALL) 6641541Srgrimes len -= n->m_len; 6651541Srgrimes off = 0; 6661541Srgrimes m = m->m_next; 6671541Srgrimes np = &n->m_next; 6681541Srgrimes } 66975105Salfred if (top == NULL) { 67075105Salfred mtx_lock(&mbuf_mtx); 67175105Salfred mbstat.m_mcfail++; 67275105Salfred mtx_unlock(&mbuf_mtx); 67375105Salfred } 6741541Srgrimes return (top); 6751541Srgrimesnospace: 6761541Srgrimes m_freem(top); 67775105Salfred mtx_lock(&mbuf_mtx); 67875105Salfred mbstat.m_mcfail++; 67975105Salfred mtx_unlock(&mbuf_mtx); 68072356Sbmilekic return (NULL); 6811541Srgrimes} 6821541Srgrimes 6831541Srgrimes/* 68415689Swollman * Copy an entire packet, including header (which must be present). 68515689Swollman * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 68654002Sarchie * Note that the copy is read-only, because clusters are not copied, 68754002Sarchie * only their reference counts are incremented. 68872750Sluigi * Preserve alignment of the first mbuf so if the creator has left 68972750Sluigi * some room at the beginning (e.g. for inserting protocol headers) 69072750Sluigi * the copies still have the room available. 69115689Swollman */ 69215689Swollmanstruct mbuf * 69372356Sbmilekicm_copypacket(struct mbuf *m, int how) 69415689Swollman{ 69515689Swollman struct mbuf *top, *n, *o; 69615689Swollman 69715689Swollman MGET(n, how, m->m_type); 69815689Swollman top = n; 69972356Sbmilekic if (n == NULL) 70015689Swollman goto nospace; 70115689Swollman 70215689Swollman M_COPY_PKTHDR(n, m); 70315689Swollman n->m_len = m->m_len; 70415689Swollman if (m->m_flags & M_EXT) { 70515689Swollman n->m_data = m->m_data; 70615689Swollman n->m_ext = m->m_ext; 70715689Swollman n->m_flags |= M_EXT; 70864837Sdwmalone MEXT_ADD_REF(m); 70915689Swollman } else { 71072750Sluigi n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 71115689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 71215689Swollman } 71315689Swollman 71415689Swollman m = m->m_next; 71515689Swollman while (m) { 71615689Swollman MGET(o, how, m->m_type); 71772356Sbmilekic if (o == NULL) 71815689Swollman goto nospace; 71915689Swollman 72015689Swollman n->m_next = o; 72115689Swollman n = n->m_next; 72215689Swollman 72315689Swollman n->m_len = m->m_len; 72415689Swollman if (m->m_flags & M_EXT) { 72515689Swollman n->m_data = m->m_data; 72615689Swollman n->m_ext = m->m_ext; 72715689Swollman n->m_flags |= M_EXT; 72864837Sdwmalone MEXT_ADD_REF(m); 72915689Swollman } else { 73015689Swollman bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 73115689Swollman } 73215689Swollman 73315689Swollman m = m->m_next; 73415689Swollman } 73515689Swollman return top; 73615689Swollmannospace: 73715689Swollman m_freem(top); 73875105Salfred mtx_lock(&mbuf_mtx); 73975105Salfred mbstat.m_mcfail++; 74075105Salfred mtx_unlock(&mbuf_mtx); 74172356Sbmilekic return (NULL); 74215689Swollman} 74315689Swollman 74415689Swollman/* 7451541Srgrimes * Copy data from an mbuf chain starting "off" bytes from the beginning, 7461541Srgrimes * continuing for "len" bytes, into the indicated buffer. 7471541Srgrimes */ 7481549Srgrimesvoid 74972356Sbmilekicm_copydata(struct mbuf *m, int off, int len, caddr_t cp) 7501541Srgrimes{ 75172356Sbmilekic unsigned count; 7521541Srgrimes 75352201Salfred KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 75452201Salfred KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 7551541Srgrimes while (off > 0) { 75652201Salfred KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 7571541Srgrimes if (off < m->m_len) 7581541Srgrimes break; 7591541Srgrimes off -= m->m_len; 7601541Srgrimes m = m->m_next; 7611541Srgrimes } 7621541Srgrimes while (len > 0) { 76352201Salfred KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 7641541Srgrimes count = min(m->m_len - off, len); 7651541Srgrimes bcopy(mtod(m, caddr_t) + off, cp, count); 7661541Srgrimes len -= count; 7671541Srgrimes cp += count; 7681541Srgrimes off = 0; 7691541Srgrimes m = m->m_next; 7701541Srgrimes } 7711541Srgrimes} 7721541Srgrimes 7731541Srgrimes/* 77454002Sarchie * Copy a packet header mbuf chain into a completely new chain, including 77554002Sarchie * copying any mbuf clusters. Use this instead of m_copypacket() when 77654002Sarchie * you need a writable copy of an mbuf chain. 77754002Sarchie */ 77854002Sarchiestruct mbuf * 77972356Sbmilekicm_dup(struct mbuf *m, int how) 78054002Sarchie{ 78154002Sarchie struct mbuf **p, *top = NULL; 78254002Sarchie int remain, moff, nsize; 78354002Sarchie 78454002Sarchie /* Sanity check */ 78554002Sarchie if (m == NULL) 78672356Sbmilekic return (NULL); 78754002Sarchie KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 78854002Sarchie 78954002Sarchie /* While there's more data, get a new mbuf, tack it on, and fill it */ 79054002Sarchie remain = m->m_pkthdr.len; 79154002Sarchie moff = 0; 79254002Sarchie p = ⊤ 79354002Sarchie while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 79454002Sarchie struct mbuf *n; 79554002Sarchie 79654002Sarchie /* Get the next new mbuf */ 79754002Sarchie MGET(n, how, m->m_type); 79854002Sarchie if (n == NULL) 79954002Sarchie goto nospace; 80054002Sarchie if (top == NULL) { /* first one, must be PKTHDR */ 80154002Sarchie M_COPY_PKTHDR(n, m); 80254002Sarchie nsize = MHLEN; 80354002Sarchie } else /* not the first one */ 80454002Sarchie nsize = MLEN; 80554002Sarchie if (remain >= MINCLSIZE) { 80654002Sarchie MCLGET(n, how); 80754002Sarchie if ((n->m_flags & M_EXT) == 0) { 80854002Sarchie (void)m_free(n); 80954002Sarchie goto nospace; 81054002Sarchie } 81154002Sarchie nsize = MCLBYTES; 81254002Sarchie } 81354002Sarchie n->m_len = 0; 81454002Sarchie 81554002Sarchie /* Link it into the new chain */ 81654002Sarchie *p = n; 81754002Sarchie p = &n->m_next; 81854002Sarchie 81954002Sarchie /* Copy data from original mbuf(s) into new mbuf */ 82054002Sarchie while (n->m_len < nsize && m != NULL) { 82154002Sarchie int chunk = min(nsize - n->m_len, m->m_len - moff); 82254002Sarchie 82354002Sarchie bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 82454002Sarchie moff += chunk; 82554002Sarchie n->m_len += chunk; 82654002Sarchie remain -= chunk; 82754002Sarchie if (moff == m->m_len) { 82854002Sarchie m = m->m_next; 82954002Sarchie moff = 0; 83054002Sarchie } 83154002Sarchie } 83254002Sarchie 83354002Sarchie /* Check correct total mbuf length */ 83454002Sarchie KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 83554002Sarchie ("%s: bogus m_pkthdr.len", __FUNCTION__)); 83654002Sarchie } 83754002Sarchie return (top); 83854002Sarchie 83954002Sarchienospace: 84054002Sarchie m_freem(top); 84175105Salfred mtx_lock(&mbuf_mtx); 84275105Salfred mbstat.m_mcfail++; 84375105Salfred mtx_unlock(&mbuf_mtx); 84472356Sbmilekic return (NULL); 84554002Sarchie} 84654002Sarchie 84754002Sarchie/* 8481541Srgrimes * Concatenate mbuf chain n to m. 8491541Srgrimes * Both chains must be of the same type (e.g. MT_DATA). 8501541Srgrimes * Any m_pkthdr is not updated. 8511541Srgrimes */ 8521549Srgrimesvoid 85372356Sbmilekicm_cat(struct mbuf *m, struct mbuf *n) 8541541Srgrimes{ 8551541Srgrimes while (m->m_next) 8561541Srgrimes m = m->m_next; 8571541Srgrimes while (n) { 8581541Srgrimes if (m->m_flags & M_EXT || 8591541Srgrimes m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 8601541Srgrimes /* just join the two chains */ 8611541Srgrimes m->m_next = n; 8621541Srgrimes return; 8631541Srgrimes } 8641541Srgrimes /* splat the data from one into the other */ 8651541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 8661541Srgrimes (u_int)n->m_len); 8671541Srgrimes m->m_len += n->m_len; 8681541Srgrimes n = m_free(n); 8691541Srgrimes } 8701541Srgrimes} 8711541Srgrimes 8721549Srgrimesvoid 87372356Sbmilekicm_adj(struct mbuf *mp, int req_len) 8741541Srgrimes{ 87572356Sbmilekic int len = req_len; 87672356Sbmilekic struct mbuf *m; 87772356Sbmilekic int count; 8781541Srgrimes 8791541Srgrimes if ((m = mp) == NULL) 8801541Srgrimes return; 8811541Srgrimes if (len >= 0) { 8821541Srgrimes /* 8831541Srgrimes * Trim from head. 8841541Srgrimes */ 8851541Srgrimes while (m != NULL && len > 0) { 8861541Srgrimes if (m->m_len <= len) { 8871541Srgrimes len -= m->m_len; 8881541Srgrimes m->m_len = 0; 8891541Srgrimes m = m->m_next; 8901541Srgrimes } else { 8911541Srgrimes m->m_len -= len; 8921541Srgrimes m->m_data += len; 8931541Srgrimes len = 0; 8941541Srgrimes } 8951541Srgrimes } 8961541Srgrimes m = mp; 8971541Srgrimes if (mp->m_flags & M_PKTHDR) 8981541Srgrimes m->m_pkthdr.len -= (req_len - len); 8991541Srgrimes } else { 9001541Srgrimes /* 9011541Srgrimes * Trim from tail. Scan the mbuf chain, 9021541Srgrimes * calculating its length and finding the last mbuf. 9031541Srgrimes * If the adjustment only affects this mbuf, then just 9041541Srgrimes * adjust and return. Otherwise, rescan and truncate 9051541Srgrimes * after the remaining size. 9061541Srgrimes */ 9071541Srgrimes len = -len; 9081541Srgrimes count = 0; 9091541Srgrimes for (;;) { 9101541Srgrimes count += m->m_len; 9111541Srgrimes if (m->m_next == (struct mbuf *)0) 9121541Srgrimes break; 9131541Srgrimes m = m->m_next; 9141541Srgrimes } 9151541Srgrimes if (m->m_len >= len) { 9161541Srgrimes m->m_len -= len; 9171541Srgrimes if (mp->m_flags & M_PKTHDR) 9181541Srgrimes mp->m_pkthdr.len -= len; 9191541Srgrimes return; 9201541Srgrimes } 9211541Srgrimes count -= len; 9221541Srgrimes if (count < 0) 9231541Srgrimes count = 0; 9241541Srgrimes /* 9251541Srgrimes * Correct length for chain is "count". 9261541Srgrimes * Find the mbuf with last data, adjust its length, 9271541Srgrimes * and toss data from remaining mbufs on chain. 9281541Srgrimes */ 9291541Srgrimes m = mp; 9301541Srgrimes if (m->m_flags & M_PKTHDR) 9311541Srgrimes m->m_pkthdr.len = count; 9321541Srgrimes for (; m; m = m->m_next) { 9331541Srgrimes if (m->m_len >= count) { 9341541Srgrimes m->m_len = count; 9351541Srgrimes break; 9361541Srgrimes } 9371541Srgrimes count -= m->m_len; 9381541Srgrimes } 9393308Sphk while (m->m_next) 9403308Sphk (m = m->m_next) ->m_len = 0; 9411541Srgrimes } 9421541Srgrimes} 9431541Srgrimes 9441541Srgrimes/* 9451541Srgrimes * Rearange an mbuf chain so that len bytes are contiguous 9461541Srgrimes * and in the data area of an mbuf (so that mtod and dtom 9471541Srgrimes * will work for a structure of size len). Returns the resulting 9481541Srgrimes * mbuf chain on success, frees it and returns null on failure. 9491541Srgrimes * If there is room, it will add up to max_protohdr-len extra bytes to the 9501541Srgrimes * contiguous region in an attempt to avoid being called next time. 9511541Srgrimes */ 9521541Srgrimesstruct mbuf * 95372356Sbmilekicm_pullup(struct mbuf *n, int len) 9541541Srgrimes{ 95572356Sbmilekic struct mbuf *m; 95672356Sbmilekic int count; 9571541Srgrimes int space; 9581541Srgrimes 9591541Srgrimes /* 9601541Srgrimes * If first mbuf has no cluster, and has room for len bytes 9611541Srgrimes * without shifting current data, pullup into it, 9621541Srgrimes * otherwise allocate a new mbuf to prepend to the chain. 9631541Srgrimes */ 9641541Srgrimes if ((n->m_flags & M_EXT) == 0 && 9651541Srgrimes n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 9661541Srgrimes if (n->m_len >= len) 9671541Srgrimes return (n); 9681541Srgrimes m = n; 9691541Srgrimes n = n->m_next; 9701541Srgrimes len -= m->m_len; 9711541Srgrimes } else { 9721541Srgrimes if (len > MHLEN) 9731541Srgrimes goto bad; 9741541Srgrimes MGET(m, M_DONTWAIT, n->m_type); 97572356Sbmilekic if (m == NULL) 9761541Srgrimes goto bad; 9771541Srgrimes m->m_len = 0; 9781541Srgrimes if (n->m_flags & M_PKTHDR) { 9791541Srgrimes M_COPY_PKTHDR(m, n); 9801541Srgrimes n->m_flags &= ~M_PKTHDR; 9811541Srgrimes } 9821541Srgrimes } 9831541Srgrimes space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 9841541Srgrimes do { 9851541Srgrimes count = min(min(max(len, max_protohdr), space), n->m_len); 9861541Srgrimes bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 9871541Srgrimes (unsigned)count); 9881541Srgrimes len -= count; 9891541Srgrimes m->m_len += count; 9901541Srgrimes n->m_len -= count; 9911541Srgrimes space -= count; 9921541Srgrimes if (n->m_len) 9931541Srgrimes n->m_data += count; 9941541Srgrimes else 9951541Srgrimes n = m_free(n); 9961541Srgrimes } while (len > 0 && n); 9971541Srgrimes if (len > 0) { 9981541Srgrimes (void) m_free(m); 9991541Srgrimes goto bad; 10001541Srgrimes } 10011541Srgrimes m->m_next = n; 10021541Srgrimes return (m); 10031541Srgrimesbad: 10041541Srgrimes m_freem(n); 100575105Salfred mtx_lock(&mbuf_mtx); 100675105Salfred mbstat.m_mcfail++; 100775105Salfred mtx_unlock(&mbuf_mtx); 100872356Sbmilekic return (NULL); 10091541Srgrimes} 10101541Srgrimes 10111541Srgrimes/* 10121541Srgrimes * Partition an mbuf chain in two pieces, returning the tail -- 10131541Srgrimes * all but the first len0 bytes. In case of failure, it returns NULL and 10141541Srgrimes * attempts to restore the chain to its original state. 10151541Srgrimes */ 10161541Srgrimesstruct mbuf * 101772356Sbmilekicm_split(struct mbuf *m0, int len0, int wait) 10181541Srgrimes{ 101972356Sbmilekic struct mbuf *m, *n; 10201541Srgrimes unsigned len = len0, remain; 10211541Srgrimes 10221541Srgrimes for (m = m0; m && len > m->m_len; m = m->m_next) 10231541Srgrimes len -= m->m_len; 102472356Sbmilekic if (m == NULL) 102572356Sbmilekic return (NULL); 10261541Srgrimes remain = m->m_len - len; 10271541Srgrimes if (m0->m_flags & M_PKTHDR) { 10281541Srgrimes MGETHDR(n, wait, m0->m_type); 102972356Sbmilekic if (n == NULL) 103072356Sbmilekic return (NULL); 10311541Srgrimes n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 10321541Srgrimes n->m_pkthdr.len = m0->m_pkthdr.len - len0; 10331541Srgrimes m0->m_pkthdr.len = len0; 10341541Srgrimes if (m->m_flags & M_EXT) 10351541Srgrimes goto extpacket; 10361541Srgrimes if (remain > MHLEN) { 10371541Srgrimes /* m can't be the lead packet */ 10381541Srgrimes MH_ALIGN(n, 0); 10391541Srgrimes n->m_next = m_split(m, len, wait); 104072356Sbmilekic if (n->m_next == NULL) { 10411541Srgrimes (void) m_free(n); 104272356Sbmilekic return (NULL); 10431541Srgrimes } else 10441541Srgrimes return (n); 10451541Srgrimes } else 10461541Srgrimes MH_ALIGN(n, remain); 10471541Srgrimes } else if (remain == 0) { 10481541Srgrimes n = m->m_next; 104972356Sbmilekic m->m_next = NULL; 10501541Srgrimes return (n); 10511541Srgrimes } else { 10521541Srgrimes MGET(n, wait, m->m_type); 105372356Sbmilekic if (n == NULL) 105472356Sbmilekic return (NULL); 10551541Srgrimes M_ALIGN(n, remain); 10561541Srgrimes } 10571541Srgrimesextpacket: 10581541Srgrimes if (m->m_flags & M_EXT) { 10591541Srgrimes n->m_flags |= M_EXT; 10601541Srgrimes n->m_ext = m->m_ext; 106164837Sdwmalone MEXT_ADD_REF(m); 10621541Srgrimes m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 10631541Srgrimes n->m_data = m->m_data + len; 10641541Srgrimes } else { 10651541Srgrimes bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 10661541Srgrimes } 10671541Srgrimes n->m_len = remain; 10681541Srgrimes m->m_len = len; 10691541Srgrimes n->m_next = m->m_next; 107072356Sbmilekic m->m_next = NULL; 10711541Srgrimes return (n); 10721541Srgrimes} 10731541Srgrimes/* 10741541Srgrimes * Routine to copy from device local memory into mbufs. 10751541Srgrimes */ 10761541Srgrimesstruct mbuf * 107772356Sbmilekicm_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 107872356Sbmilekic void (*copy)(char *from, caddr_t to, u_int len)) 10791541Srgrimes{ 108072356Sbmilekic struct mbuf *m; 10811541Srgrimes struct mbuf *top = 0, **mp = ⊤ 108272356Sbmilekic int off = off0, len; 108372356Sbmilekic char *cp; 10841541Srgrimes char *epkt; 10851541Srgrimes 10861541Srgrimes cp = buf; 10871541Srgrimes epkt = cp + totlen; 10881541Srgrimes if (off) { 10891541Srgrimes cp += off + 2 * sizeof(u_short); 10901541Srgrimes totlen -= 2 * sizeof(u_short); 10911541Srgrimes } 10921541Srgrimes MGETHDR(m, M_DONTWAIT, MT_DATA); 109372356Sbmilekic if (m == NULL) 109472356Sbmilekic return (NULL); 10951541Srgrimes m->m_pkthdr.rcvif = ifp; 10961541Srgrimes m->m_pkthdr.len = totlen; 10971541Srgrimes m->m_len = MHLEN; 10981541Srgrimes 10991541Srgrimes while (totlen > 0) { 11001541Srgrimes if (top) { 11011541Srgrimes MGET(m, M_DONTWAIT, MT_DATA); 110272356Sbmilekic if (m == NULL) { 11031541Srgrimes m_freem(top); 110472356Sbmilekic return (NULL); 11051541Srgrimes } 11061541Srgrimes m->m_len = MLEN; 11071541Srgrimes } 11081541Srgrimes len = min(totlen, epkt - cp); 11091541Srgrimes if (len >= MINCLSIZE) { 11101541Srgrimes MCLGET(m, M_DONTWAIT); 11111541Srgrimes if (m->m_flags & M_EXT) 11121541Srgrimes m->m_len = len = min(len, MCLBYTES); 11131541Srgrimes else 11141541Srgrimes len = m->m_len; 11151541Srgrimes } else { 11161541Srgrimes /* 11171541Srgrimes * Place initial small packet/header at end of mbuf. 11181541Srgrimes */ 11191541Srgrimes if (len < m->m_len) { 112072356Sbmilekic if (top == NULL && len + 112172356Sbmilekic max_linkhdr <= m->m_len) 11221541Srgrimes m->m_data += max_linkhdr; 11231541Srgrimes m->m_len = len; 11241541Srgrimes } else 11251541Srgrimes len = m->m_len; 11261541Srgrimes } 11271541Srgrimes if (copy) 11281541Srgrimes copy(cp, mtod(m, caddr_t), (unsigned)len); 11291541Srgrimes else 11301541Srgrimes bcopy(cp, mtod(m, caddr_t), (unsigned)len); 11311541Srgrimes cp += len; 11321541Srgrimes *mp = m; 11331541Srgrimes mp = &m->m_next; 11341541Srgrimes totlen -= len; 11351541Srgrimes if (cp == epkt) 11361541Srgrimes cp = buf; 11371541Srgrimes } 11381541Srgrimes return (top); 11391541Srgrimes} 11403352Sphk 11413352Sphk/* 11423352Sphk * Copy data from a buffer back into the indicated mbuf chain, 11433352Sphk * starting "off" bytes from the beginning, extending the mbuf 11443352Sphk * chain if necessary. 11453352Sphk */ 11463352Sphkvoid 114772356Sbmilekicm_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 11483352Sphk{ 114972356Sbmilekic int mlen; 115072356Sbmilekic struct mbuf *m = m0, *n; 11513352Sphk int totlen = 0; 11523352Sphk 115372356Sbmilekic if (m0 == NULL) 11543352Sphk return; 11553352Sphk while (off > (mlen = m->m_len)) { 11563352Sphk off -= mlen; 11573352Sphk totlen += mlen; 115872356Sbmilekic if (m->m_next == NULL) { 11593352Sphk n = m_getclr(M_DONTWAIT, m->m_type); 116072356Sbmilekic if (n == NULL) 11613352Sphk goto out; 11623352Sphk n->m_len = min(MLEN, len + off); 11633352Sphk m->m_next = n; 11643352Sphk } 11653352Sphk m = m->m_next; 11663352Sphk } 11673352Sphk while (len > 0) { 11683352Sphk mlen = min (m->m_len - off, len); 11693352Sphk bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 11703352Sphk cp += mlen; 11713352Sphk len -= mlen; 11723352Sphk mlen += off; 11733352Sphk off = 0; 11743352Sphk totlen += mlen; 11753352Sphk if (len == 0) 11763352Sphk break; 117772356Sbmilekic if (m->m_next == NULL) { 11783352Sphk n = m_get(M_DONTWAIT, m->m_type); 117972356Sbmilekic if (n == NULL) 11803352Sphk break; 11813352Sphk n->m_len = min(MLEN, len); 11823352Sphk m->m_next = n; 11833352Sphk } 11843352Sphk m = m->m_next; 11853352Sphk } 11863352Sphkout: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 11873352Sphk m->m_pkthdr.len = totlen; 11883352Sphk} 118952756Sphk 119052756Sphkvoid 119152756Sphkm_print(const struct mbuf *m) 119252756Sphk{ 119352756Sphk int len; 119454906Seivind const struct mbuf *m2; 119552756Sphk 119652756Sphk len = m->m_pkthdr.len; 119752756Sphk m2 = m; 119852756Sphk while (len) { 119952756Sphk printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 120052756Sphk len -= m2->m_len; 120152756Sphk m2 = m2->m_next; 120252756Sphk } 120352756Sphk return; 120452756Sphk} 1205